otsdaq  v1_01_03
 All Classes Namespaces Functions
XmlDocument.cc
1 
2 #include "otsdaq-core/XmlUtilities/XmlDocument.h"
3 #include "otsdaq-core/XmlUtilities/ConvertToXML.h"
4 #include "otsdaq-core/XmlUtilities/ConvertFromXML.h"
5 #include "otsdaq-core/MessageFacility/MessageFacility.h"
6 #include "otsdaq-core/Macros/CoutHeaderMacros.h"
7 
8 #include <xercesc/parsers/XercesDOMParser.hpp>
9 #include <stdexcept>
10 #include <xercesc/dom/DOM.hpp>
11 #include <xercesc/dom/DOMDocument.hpp>
12 #include <xercesc/dom/DOMDocumentType.hpp>
13 #include <xercesc/dom/DOMElement.hpp>
14 #include <xercesc/dom/DOMImplementation.hpp>
15 #include <xercesc/dom/DOMImplementationRegistry.hpp>
16 #include <xercesc/dom/DOMImplementationLS.hpp>
17 //#include <xercesc/dom/DOMLSSerializer.hpp>
18 //#include <xercesc/dom/DOMLSOutput.hpp>
19 #include <xercesc/dom/DOMNodeIterator.hpp>
20 #include <xercesc/dom/DOMNodeList.hpp>
21 #include <xercesc/dom/DOMText.hpp>
22 #include <xercesc/validators/common/Grammar.hpp>
23 
24 #include <xercesc/parsers/XercesDOMParser.hpp>
25 #include <xercesc/util/XMLUni.hpp>
26 #include <xercesc/util/XercesDefs.hpp>
27 
28 #include <xercesc/util/OutOfMemoryException.hpp>
29 #include <xercesc/framework/LocalFileFormatTarget.hpp>
30 
31 #include <iostream>
32 #include <sstream>
33 #include <list>
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <unistd.h>
38 #include <errno.h>
39 
40 using namespace ots;
41 
42 //==============================================================================
43 XmlDocument::XmlDocument(std::string rootName) :
44  rootTagName_(rootName)
45 {
46  INIT_MF("XmlDocument");
47  //__MOUT__ << "in" << std::endl;
48  initDocument();
49  rootElement_ = theDocument_->getDocumentElement();
50  //__MOUT__ << "out" << std::endl;
51 }
52 
53 //==============================================================================
54 XmlDocument::XmlDocument(const XmlDocument& doc) :
55  rootTagName_(doc.rootTagName_)
56 {
57  //__MOUT__ << "in" << std::endl;
58  *this = doc;
59  //__MOUT__ << "out" << std::endl;
60 }
61 
62 //==============================================================================
63 XmlDocument& XmlDocument::operator=(const XmlDocument& doc)
64 {
65  //__MOUT__ << "in" << std::endl;
66  initDocument();
67  rootElement_ = theDocument_->getDocumentElement();
68  recursiveElementCopy(doc.rootElement_, rootElement_);
69  //__MOUT__ << "out" << std::endl;
70  return *this;
71 }
72 
73 //==============================================================================
74 XmlDocument::~XmlDocument(void)
75 {
76  //std::cout << __COUT_HDR_FL__<< "Xml Destructor" << std::endl;
77  terminatePlatform();
78 }
79 
80 //==============================================================================
81 void XmlDocument::initDocument(void)
82 {
83  initPlatform();
84 
85  theImplementation_ = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("Core"));
86 
87  if(theImplementation_)
88  {
89  try
90  {
91  theDocument_ = theImplementation_->createDocument(
92  CONVERT_TO_XML("http://www.w3.org/2001/XMLSchema-instance"), // root element namespace URI.
93  CONVERT_TO_XML(rootTagName_), // root element name
94  0); // theDocument_ type object (DTD).
95  }
96  catch (const xercesc::OutOfMemoryException&)
97  {
98  XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
99  }
100  catch (const xercesc::DOMException& e)
101  {
102  XERCES_STD_QUALIFIER cerr << "DOMException code is: " << e.code << XERCES_STD_QUALIFIER endl;
103  }
104  catch(const xercesc::XMLException& e)
105  {
106  __MOUT__ << "Error Message: " << XML_TO_CHAR(e.getMessage()) << std::endl;
107  }
108  catch (...)
109  {
110  XERCES_STD_QUALIFIER cerr << "An error occurred creating the theDocument_" << XERCES_STD_QUALIFIER endl;
111  }
112  }
113  else
114  XERCES_STD_QUALIFIER cerr << "Requested theImplementation_ is not supported" << XERCES_STD_QUALIFIER endl;
115 }
116 
117 //==============================================================================
118 void XmlDocument::initPlatform(void)
119 {
120  try
121  {
122  xercesc::XMLPlatformUtils::Initialize(); // Initialize Xerces infrastructure
123  //std::cout << __COUT_HDR_FL__<< "Initialized new theDocument_" << std::endl;
124  }
125  catch( xercesc::XMLException& e )
126  {
127  __MOUT__ << "XML toolkit initialization error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
128  }
129 
130 }
131 
132 //==============================================================================
133 void XmlDocument::terminatePlatform(void)
134 {
135  try
136  {
137  //std::cout << __COUT_HDR_FL__<< "Releasing the document" << std::endl;
138  theDocument_->release();
139  //std::cout << __COUT_HDR_FL__<< "document released" << std::endl;
140  }
141  catch (...)
142  {
143  XERCES_STD_QUALIFIER cerr << "An error occurred destroying the theDocument_" << XERCES_STD_QUALIFIER endl;
144  }
145 
146  try
147  {
148  xercesc::XMLPlatformUtils::Terminate(); // Terminate after release of memory
149  }
150  catch( xercesc::XMLException& e )
151  {
152  __MOUT__ << "XML toolkit teardown error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
153  //XMLString::release(&message);
154  }
155 }
156 
157 //==============================================================================
158 //addTextElementToParent
159 // add to parent by pointer to parent
160 // returns pointer to element that is added
161 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, xercesc::DOMElement* parent)
162 {
163  if(parent == 0) return 0;
164  xercesc::DOMElement* child;
165  try
166  {
167  child = theDocument_->createElement(CONVERT_TO_XML(childName));
168  }
169  catch (xercesc::DOMException& e)
170  {
171  __MOUT__ << "Can't use the name: " << childName << " to create the child element because the exception says: "
172  << XML_TO_CHAR(e.getMessage()) << ". Very likely you have a name that starts with a number and that's not allowed!" << std::endl;
173  }
174  parent->appendChild(child);
175 
176  try
177  {
178  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML(childText)));
179  }
180  catch(...) //sometimes see TranscodingException
181  {
182  __MOUT_ERR__ << "Error caught attempting to create a text node for this text: " <<
183  childText << ". Converting instead to 'Illegal text..'" << std::endl;
184  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML("Illegal text content blocked.")));
185  }
186 
187  return child;
188 }
189 
190 //==============================================================================
191 //addTextElementToParent
192 // add to parent by instance number of parent name
193 // returns pointer to element that is added
194 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, std::string parentName, unsigned int parentIndex)
195 {
196  xercesc::DOMNodeList* nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
197 
198  if(parentIndex >= nodeList->getLength())
199  {
200  __MOUT__ << "WARNING: Illegal parent index attempted in tags with name: " << parentName << ", index: " << parentIndex << std::endl;
201  return 0; //illegal index attempted
202  }
203 
204  return addTextElementToParent(childName, childText,(xercesc::DOMElement*)(nodeList->item(parentIndex)));
205 }
206 
207 //==============================================================================
208 void XmlDocument::copyDocument(const xercesc::DOMDocument* toCopy, xercesc::DOMDocument* copy)
209 {
210  recursiveElementCopy(toCopy->getDocumentElement(),copy->getDocumentElement());
211 }
212 
213 //==============================================================================
214 void XmlDocument::recursiveElementCopy(const xercesc::DOMElement* toCopy, xercesc::DOMElement* copy)
215 {
216  xercesc::DOMNodeList* nodeListToCopy = toCopy->getChildNodes(); //get all children of the list to copy
217  xercesc::DOMNode* iNode;
218  xercesc::DOMDocument* copyDocument = copy->getOwnerDocument();
219  for(unsigned int i=0; i<nodeListToCopy->getLength(); i++)
220  {
221  iNode = nodeListToCopy->item(i);
222  xercesc::DOMElement* child = copyDocument->createElement(iNode->getNodeName());
223  copy->appendChild(child);
224  if( iNode->getFirstChild() != 0 && iNode->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
225  {
226  child->appendChild(copyDocument->createTextNode(child->getFirstChild()->getNodeValue()));
227  }
228  recursiveElementCopy((xercesc::DOMElement*)(iNode),child);
229  }
230 }
231 
232 //==============================================================================
233 //XmlDocument::addElementToParent
234 // Add field/value element to XML doc at parent
235 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
236 // children to the new element
237 // On Failure, return -1
238 /*
239 unsigned int XmlDocument::addElementToParent(std::string field, std::string value, xercesc::DOMElement *parentEl, bool verbose)
240 {
241  DOMNodeList *nodeList = parentEl->getChildNodes(); //get all children
242 
243  if(verbose)
244  {
245  //display parent info
246  //std::cout << __COUT_HDR_FL__<< "Parent Name: " << XML_TO_CHAR(parentEl->getNodeName()) << " Field: " << field << " Value: " << value << std::endl;
247  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
248  std::cout << __COUT_HDR_FL__<< "Parent's First Child Node Value: " << XML_TO_CHAR(parentEl->getFirstChild()->getNodeValue()) << std::endl;
249  }
250 
251  //add field/value element
252  DOMElement *newEl = theDocument_->createElement(CONVERT_TO_XML(field));
253  parentEl->appendChild(newEl);
254 
255  DOMText* valueStr = theDocument_->createTextNode(CONVERT_TO_XML(value));
256  newEl->appendChild(valueStr);
257 
258  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
259  return nodeList->getLength() - 2; //return child index among parent's children, not counting first child text node
260  return nodeList->getLength() - 1; //return child index among parent's children
261 }
262 */
263 //==============================================================================
264 //XmlDocument::addDataElement
265 // Add field/value element to XML doc at parent which is returned from getElementsByTagName(parentName), entry number parentNameIndex
266 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
267 // children to the new element
268 // On Failure, return -1
269 /*
270 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, std::string parentName, unsigned int parentNameIndex)
271 {
272  DOMNodeList *nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
273 
274  if(parentNameIndex >= nodeList->getLength()) {
275  __MOUT__ << "illegal parent index attempted in tags with name: " << parentName << ", index: " << parentNameIndex << std::endl;
276  return -1; //illegal index attempted
277  }
278 
279  return addElementToParent(field,value,(DOMElement*)(nodeList->item(parentNameIndex)));
280 }
281 */
282 //==============================================================================
283 //XmlDocument::addDataElement
284 // Add field/value element to XML doc at parentIndexArray (with depth of parent indicated by parentIndexArraySize)
285 // If parentIndexArray = NULL, element is added with <DATA> parent
286 // otherwise, parentIndexArray indicates the parent within the node list for <DATA> where
287 // the element will be added
288 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
289 // children to the new element
290 // On Failure, return -1
291 /*
292 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, unsigned int *parentIndexArray, unsigned int parentIndexArraySize)
293 {
294 
295  //__MOUT__ << "field: " << field << ", value: " << value << ", parent: " << parentIndexArraySize << std::endl;
296 
297  DOMElement *parentEl = dataElement; // initialize parent to <DATA>
298 
299  if(parentIndexArray) //if there passed an array find parent relative to data element
300  {
301  //std::cout << __COUT_HDR_FL__<< "Using Parent Index Array" << std::endl;
302 
303  DOMNodeList *nodeList;
304 
305  //iterate through nested parents based on parentIndexArray
306  unsigned int tmpi,cntNotTxt;
307  for(unsigned int i=0;i<parentIndexArraySize;++i)
308  {
309  nodeList = parentEl->getChildNodes(); //get all children
310  cntNotTxt = 0;
311 
312  //get cntNotTxt to proper non text node
313  for(tmpi=0;tmpi<nodeList->getLength();++tmpi)
314  {
315  if(((DOMElement*)(nodeList->item(tmpi)))->getNodeType() == DOMNode::TEXT_NODE) continue; //skip text nodes
316 
317  if(cntNotTxt == parentIndexArray[i]) break; //at proper parent node!
318  ++cntNotTxt; //else look for next
319  }
320 
321  //in theory, only first child can be text - ignore text node children
322  //if(parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) ++tmpi;
323 
324  if(tmpi >= nodeList->getLength()) {
325  __MOUT__ << "illegal child index attempted in nested parents: " << parentIndexArray[i] << ", depth: " << i << ", tmpi: " << tmpi << std::endl;
326  return -1; //illegal child index attempted in nested parents
327  }
328 
329  parentEl = (DOMElement*)(nodeList->item(tmpi));
330  }
331  }
332 
333  return addElementToParent(field,value,parentEl);
334 }
335 */
336 //==============================================================================
337 //XmlDocument::addXmlData
338 // Append <DATA> from xmldoc to this XML doc
339 // On Success, The child index within <DATA> of the first element is returned
340 // On Failure, return -1
341 /*
342 unsigned int XmlDocument::addXmlData (XmlDocument *xmldoc)
343 {
344  //
345 
346  int retIndex = dataElement->getChildNodes()->getLength(); //will be index of first appended data element
347 
348  //add all first level child elements of data and recurse on them
349  DOMNodeList *nodeList = xmldoc->dataElement->getChildNodes(); //get all children within data
350  for(unsigned int i = 0; i<nodeList->getLength();++i)
351  {
352  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node children
353  continue;
354 
355  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),dataElement);
356  }
357 
358  return retIndex;
359 }
360 */
361 //==============================================================================
362 //XmlDocument::recursiveAddElementToParent
363 // add currEl and its children tree to parentEl
364 /*
365 void XmlDocument::recursiveAddElementToParent (DOMElement *currEl, DOMElement *parentEl)
366 {
367 std::string field, value = "";
368 
369  //char *tmpField =
370  field = XML_TO_CHAR(currEl->getNodeName());//XML_TO_CHAR(currEl->getNodeName());
371  //field = tmpField;
372  //XMLString::release( &tmpField );
373 
374  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
375  value = escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
376 
377  //insert currEl
378  addElementToParent(field,value,parentEl);
379 
380  //insert rest of currEl tree
381  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children of currEl
382  for(unsigned int i = 0; i<nodeList->getLength();++i)
383  {
384  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node children
385  continue;
386 
387  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),currEl);
388  }
389 }
390 */
391 //==============================================================================
392 //XmlDocument::outputXmlDocument
393 // recurse through XML theDocument_ and std out and output to stream parameter if not null
394 void XmlDocument::outputXmlDocument (std::ostringstream *out, bool dispStdOut)
395 {
396  recursiveOutputXmlDocument(theDocument_->getDocumentElement(),out,dispStdOut);
397 }
398 
399 //==============================================================================
400 //XmlDocument::recursiveOutputXmlDocument
401 // recursively printout XML theDocument_ to std out and output stream if not null
402 void XmlDocument::recursiveOutputXmlDocument (xercesc::DOMElement *currEl, std::ostringstream *out, bool dispStdOut, std::string tabStr)
403 {
404  //open field tag
405  if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "<" << XML_TO_CHAR(currEl->getNodeName()) ;
406  if(out) *out << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
407 
408  //insert value if text node child
409  if( currEl->getFirstChild() != NULL &&
410  currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
411  {
412  if(dispStdOut) std::cout << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
413  if(out) *out << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
414  }
415 
416  xercesc::DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
417 
418  //close opening field tag
419  if(dispStdOut) std::cout << ((nodeList->getLength() == 0 ||
420  (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
421  << ">" << " len:" << nodeList->getLength() << std::endl;
422  if(out) *out << ((nodeList->getLength() == 0 ||
423  (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
424  << ">" << std::endl;
425 
426  //insert children
427  std::string newTabStr = tabStr + "\t";
428  for(unsigned int i = 0; i<nodeList->getLength();++i)
429  if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE) //ignore text node children
430  recursiveOutputXmlDocument ((xercesc::DOMElement*)(nodeList->item(i)),out,dispStdOut,newTabStr);
431 
432  //close tag if children
433  if(nodeList->getLength() > 1 || (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() != xercesc::DOMNode::TEXT_NODE))
434  {
435  if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
436  if(out) *out << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
437  }
438 }
439 
440 //==============================================================================
441 //XmlDocument::getDataElement
442 // returns the value for field found occurance number of times
443 // returns empty std::string "" if field was not found
444 /*
445 std::string XmlDocument::getDataElement (const std::string field, const unsigned int occurance)
446 {
447  unsigned int count = 0;
448  return recursiveFindElement(theDocument_->getDocumentElement(),field,occurance,count);
449 }
450 */
451 //==============================================================================
452 //XmlDocument::recursiveFindElement
453 // recursively searches and returns the value for field found occurance number of times
454 /*
455 std::string XmlDocument::recursiveFindElement (DOMElement *currEl, const std::string field, const unsigned int occurance, unsigned int &count)
456 {
457  if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++) //found, done!!
458  {
459  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
460  return escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
461  else
462  return ""; //empty value attribute
463  }
464 
465  std::string retStr;
466  //look through children recursively
467  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
468  for(unsigned int i = 0; i<nodeList->getLength();++i)
469  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node children
470  {
471  retStr = recursiveFindElement ((DOMElement*)(nodeList->item(i)),field,occurance,count);
472  if(retStr != "") return retStr; //found among children already, done
473  //else continue search within children recursively
474  }
475  return ""; //nothing found
476 }
477 */
478 //==============================================================================
479 //XmlDocument::getAllDataElements
480 // returns all of the values found for the field in a vector
481 // if none found vector will have size 0
482 /*
483 std::vector<std::string> XmlDocument::getAllDataElements (std::string field)
484 {
485  vector<string> retVec;
486 
487  recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
488 
489  return retVec;
490 }
491 */
492 //==============================================================================
493 //XmlDocument::recursiveFindElement
494 // recursively searches and returns the value for field found occurance number of times
495 /*
496 void XmlDocument::recursiveFindAllElements (DOMElement *currEl, const std::string field,std::vector<std::string> *retVec)
497 {
498  if (XML_TO_CHAR(currEl->getNodeName()) == field &&
499  currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
500  retVec->push_back(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
501 
502 
503  //look through children recursively
504  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
505  for(unsigned int i = 0; i<nodeList->getLength();++i)
506  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node children
507  recursiveFindAllElements ((DOMElement*)(nodeList->item(i)),field,retVec);
508 }
509 */
510 //==============================================================================
511 //XmlDocument::escapeString
512 // convert quotes to html quote characters &apos; = ' and &quot; = "
513 // remove new line characters
514 // and (if !allowWhiteSpace) remove white space (so that read from file white space artifact removed)
515 //
516 // convert &amp; = &
517 // if(allowWhiteSpace) convert \t to 8 &#160; spaces and \n to <br>
518 std::string XmlDocument::escapeString(std::string inString, bool allowWhiteSpace)
519 {
520  bool doit = false;
521 
522  unsigned int ws = -1;
523  char htmlTmp[6];
524 
525  for(unsigned int i=0; i<inString.length(); i++)
526  if(inString[i] != ' ')
527  {
528 
529 
530  if(doit) std::cout << __COUT_HDR_FL__<< inString[i] << ":" <<
531  (int)inString[i] << ":" << inString << std::endl;
532 
533  //remove new lines and unprintable characters
534  if(inString[i] == '\r' || inString[i] == '\n' ||//remove new line chars
535  inString[i] == '\t' || //remove tabs
536  inString[i] < 32 || //remove un-printable characters (they mess up xml interpretation)
537  (inString[i] > char(126) && inString[i] < char(161))) //this is aggravated by the bug in MFextensions (though Eric says he fixed on 8/24/2016)
538  //Note: greater than 255 should be impossible if by byte (but there are html chracters in 300s and 8000s)
539  {
540  if(allowWhiteSpace && //maintain new lines and tabs
541  inString[i] == '\n')
542  {
543  sprintf(htmlTmp,"&#%3.3d",inString[i]);
544  inString.insert(i,htmlTmp); //insert html str sequence
545  inString.replace(i+5,1,1,';'); // replace special character with ;
546  i+=6; //skip to next char to check
547  --i;
548  }
549  else if(allowWhiteSpace && //maintain new lines and tabs
550  inString[i] == '\t')
551  {
552  if(0)
553  {
554  //tab = 8 spaces
555  sprintf(htmlTmp,"&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160");
556  inString.insert(i,htmlTmp); //insert html str sequence
557  inString.replace(i+47,1,1,';'); // replace special character with ;
558  i+=48; //skip to next char to check
559  --i;
560  }
561  else //tab = 0x09
562  {
563 
564  sprintf(htmlTmp,"&#009");
565  inString.insert(i,htmlTmp); //insert html str sequence
566  inString.replace(i+5,1,1,';'); // replace special character with ;
567  i+=6; //skip to next char to check
568  --i;
569  }
570  }
571  else
572  {
573  inString.erase(i,1); //erase character
574  --i; //step back so next char to check is correct
575  }
576  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
577  continue;
578  }
579 
580  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
581 
582  //replace special characters
583  if(inString[i] == '\"' || inString[i] == '\'')
584  {
585  inString.insert(i,(inString[i] == '\'')?"&apos":"&quot"); //insert HTML name before quotes
586  inString.replace(i+5,1,1,';'); // replace special character with ;
587  i+=5; //skip to next char to check
588  //std::cout << __COUT_HDR_FL__<< inString << std::endl;
589  }
590  else if(inString[i] == '&')
591  {
592  inString.insert(i,"&amp"); //insert HTML name before special character
593  inString.replace(i+4,1,1,';'); // replace special character with ;
594  i+=4; //skip to next char to check
595  }
596  else if(inString[i] == '<' || inString[i] == '>')
597  {
598  inString.insert(i,(inString[i] == '<')?"&lt":"&gt"); //insert HTML name before special character
599  inString.replace(i+3,1,1,';'); // replace special character with ;
600  i+=3; //skip to next char to check
601  }
602  else if(inString[i] >= char(161) && inString[i] <= char(255)) //printable special characters
603  {
604  sprintf(htmlTmp,"&#%3.3d",inString[i]);
605  inString.insert(i,htmlTmp); //insert html number sequence
606  inString.replace(i+5,1,1,';'); // replace special character with ;
607  i+=5; //skip to next char to check
608  }
609 
610  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
611 
612  ws = i; //last non white space char
613  }
614  else if(allowWhiteSpace) //keep white space if allowed
615  {
616  if(i-1 == ws) continue; //dont do anything for first white space
617 
618  //for second white space add 2, and 1 from then
619  if(i-2 == ws)
620  {
621  inString.insert(i,"&#160;"); //insert html space
622  i+=6; //skip to point at space again
623  }
624  inString.insert(i,"&#160"); //insert html space
625  inString.replace(i+5,1,1,';'); // replace special character with ;
626  i+=5; //skip to next char to check
627  ws = i;
628  }
629 
630  if(doit) std::cout << __COUT_HDR_FL__<< inString.size() << " " << ws << std::endl;
631 
632  inString.substr(0,ws+1);
633 
634  if(doit) std::cout << __COUT_HDR_FL__<< inString.size() << " " << inString << std::endl;
635 
636  if(ws == (unsigned int)-1) return ""; //empty std::string since all white space
637  return inString.substr(0,ws+1); //trim right white space
638 }
639 
640 
641 //==============================================================================
642 //XmlDocument::recursiveRemoveChild
643 // remove child and all of child's sub-tree from parent
644 void XmlDocument::recursiveRemoveChild(xercesc::DOMElement *childEl, xercesc::DOMElement *parentEl)
645 {
646  //release child's children first
647  xercesc::DOMNodeList* nodeList = childEl->getChildNodes(); //get all children within data
648  for(unsigned int i = 0; i<nodeList->getLength(); ++i)
649  recursiveRemoveChild((xercesc::DOMElement*)(nodeList->item(nodeList->getLength()-1-i)),childEl);
650 
651  //then release child
652  parentEl->removeChild(childEl);
653  childEl->release();
654 }
655 
656 //==============================================================================
657 //XmlDocument::saveXmlDocument
658 // wrapper for private outputXML
659 // Warning: filePath must be accessible or program will crash!
660 void XmlDocument::saveXmlDocument (std::string filePath)
661 {
662  std::cout << __COUT_HDR_FL__<< "Saving theDocument_ to file: " << filePath << std::endl;
663  //Return the first registered theImplementation_ that has the desired features. In this case, we are after a DOM theImplementation_ that has the LS feature... or Load/Save.
664  //DOMImplementation *theImplementation_ = DOMImplementationRegistry::getDOMImplementation(L"LS");
665  xercesc::DOMImplementation *saveImplementation = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("LS"));
666 
667  std::cout << __COUT_HDR_FL__<< "XERCES Version: " << _XERCES_VERSION << std::endl;
668 
669 #if _XERCES_VERSION >= 30000
670 
671  //std::cout << __COUT_HDR_FL__<< "making file" << filePath << std::endl;
672  // Create a DOMLSSerializer which is used to serialize a DOM tree into an XML theDocument_.
673  xercesc::DOMLSSerializer *serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSSerializer();
674 
675  // Make the output more human readable by inserting line feeds.
676  if (serializer->getDomConfig()->canSetParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true))
677  serializer->getDomConfig()->setParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
678 
679  // The end-of-line sequence of characters to be used in the XML being written out.
680  serializer->setNewLine(CONVERT_TO_XML("\r\n"));
681 
682  // Convert the path into Xerces compatible XMLCh*.
683  //XMLCh *tempFilePath = const_cast<XMLCh*>(CONVERT_TO_XML(filePath));
684 
685  // Specify the target for the XML output.
686  xercesc::XMLFormatTarget* formatTarget;
687  try
688  {
689  //formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
690  formatTarget = new xercesc::LocalFileFormatTarget(filePath.c_str());
691  }
692  catch(...)
693  {
694  std::cout << __COUT_HDR_FL__<< "Inaccessible file path: " << filePath << std::endl;
695  serializer->release();
696  //xercesc::XMLString::release(&tempFilePath);
697 
698  return;
699  }
700 
701  // Create a new empty output destination object.
702  xercesc::DOMLSOutput *output = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSOutput();
703 
704  // Set the stream to our target.
705  output->setByteStream(formatTarget);
706  // Write the serialized output to the destination.
707  serializer->write(theDocument_, output);
708  serializer->release();
709  //xercesc::XMLString::release(&tempFilePath);
710  delete formatTarget;
711 #else
712 
713  xercesc::DOMWriter *serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createDOMWriter();
714  serializer->setFeature(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
715 
716  /*
717  Choose a location for the serialized output. The 3 options are:
718  1) StdOutFormatTarget (std output stream - good for debugging)
719  2) MemBufFormatTarget (to Memory)
720  3) LocalFileFormatTarget (save to file)
721  (Note: You'll need a different header file for each one)
722  */
723  //XMLFormatTarget* pTarget = new StdOutFormatTarget();
724  // Convert the path into Xerces compatible XMLCh*.
725  XMLCh *tempFilePath = xercesc::XMLString::transcode(filePath.c_str());
726  xercesc::XMLFormatTarget* formatTarget;
727  try
728  {
729  formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
730  }
731  catch(...)
732  {
733  std::cout << __COUT_HDR_FL__<< "Inaccessible file path: " << filePath << std::endl;
734  serializer->release();
735  xercesc::XMLString::release(&tempFilePath);
736  return;
737  }
738 
739  // Write the serialized output to the target.
740 
741  serializer->writeNode(formatTarget, *theDocument_);
742  serializer->release();
743  xercesc::XMLString::release(&tempFilePath);
744  delete formatTarget;
745 #endif
746 
747  // Cleanup.
748  //std::cout << __COUT_HDR_FL__<< "delete format target" << std::endl;
749 
750 
751 #if _XERCES_VERSION >= 30000
752 
753  //std::cout << __COUT_HDR_FL__<< "delete output0" << std::endl;
754  output->release();
755  //std::cout << __COUT_HDR_FL__<< "delete output1" << std::endl;
756 
757 #endif
758 }
759 
760 
761 //==============================================================================
762 bool XmlDocument::loadXmlDocument (std::string filePath)
763 {
764  std::cout << __COUT_HDR_FL__<< "Loading theDocument_ from file: " << filePath << std::endl;
765 
766  struct stat fileStatus;
767 
768  if(stat(filePath.c_str(), &fileStatus) != 0)
769  {
770  std::cout << __COUT_HDR_FL__<< "File not accessible." << std::endl;
771  return false;
772  }
773 
774  //reset xml platform and theDocument_
775  terminatePlatform();
776  initPlatform();
777 
778  xercesc::XercesDOMParser* parser = new xercesc::XercesDOMParser;
779  // Configure xercesc::DOM parser.
780  parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
781  parser->setDoNamespaces ( true );
782  parser->setDoSchema ( true );
783  parser->useCachedGrammarInParse ( false );
784 
785  try
786  {
787  parser->parse( filePath.c_str() );
788 
789  //theDocument_ memory object owned by the parent parser object
790  theDocument_ = parser->adoptDocument();//instead of getDocument() so parser will not free theDocument_ when released
791 
792  // Get the top-level element: Name is "root". No attributes for "root"
793  rootElement_ = theDocument_->getDocumentElement();
794  if( !rootElement_ )
795  throw(std::runtime_error( "empty XML theDocument_" ));
796 
797  }
798  catch( xercesc::XMLException& e )
799  {
800  std::cout << __COUT_HDR_FL__<< "Error parsing file." << std::endl;
801  return false;
802  }
803  delete parser;
804 
805  return true;
806 }
807 
808 
809 //==============================================================================
810 //XmlDocument::recursiveOutputXmlDocument
811 // recursively printout XML theDocument_ to std out and output stream if not null
812 /*
813 void XmlDocument::recursiveFixTextFields(DOMElement *currEl)
814 {
815  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
816 
817  //recurse through children
818  for(unsigned int i = 0; i<nodeList->getLength();++i)
819  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //fix text nodes
820  ((DOMElement*)(nodeList->item(i)))->setTextContent(CONVERT_TO_XML( //change text value to escaped version
821  escapeString(XML_TO_CHAR(((DOMElement*)(nodeList->item(i)))->getNodeValue()))));
822  else
823  recursiveFixTextFields ((DOMElement*)(nodeList->item(i)));
824 }
825 */
826