otsdaq  v1_01_04
 All Classes Namespaces Functions
XmlDocument.cc
1 
2 #include "otsdaq-core/XmlUtilities/XmlDocument.h"
3 #include "otsdaq-core/XmlUtilities/ConvertToXML.h"
4 #include "otsdaq-core/XmlUtilities/ConvertFromXML.h"
5 #include "otsdaq-core/MessageFacility/MessageFacility.h"
6 #include "otsdaq-core/Macros/CoutHeaderMacros.h"
7 
8 #include <xercesc/parsers/XercesDOMParser.hpp>
9 #include <stdexcept>
10 #include <xercesc/dom/DOM.hpp>
11 #include <xercesc/dom/DOMDocument.hpp>
12 #include <xercesc/dom/DOMDocumentType.hpp>
13 #include <xercesc/dom/DOMElement.hpp>
14 #include <xercesc/dom/DOMImplementation.hpp>
15 #include <xercesc/dom/DOMImplementationRegistry.hpp>
16 #include <xercesc/dom/DOMImplementationLS.hpp>
17 //#include <xercesc/dom/DOMLSSerializer.hpp>
18 //#include <xercesc/dom/DOMLSOutput.hpp>
19 #include <xercesc/dom/DOMNodeIterator.hpp>
20 #include <xercesc/dom/DOMNodeList.hpp>
21 #include <xercesc/dom/DOMText.hpp>
22 #include <xercesc/validators/common/Grammar.hpp>
23 
24 #include <xercesc/parsers/XercesDOMParser.hpp>
25 #include <xercesc/util/XMLUni.hpp>
26 #include <xercesc/util/XercesDefs.hpp>
27 
28 #include <xercesc/util/OutOfMemoryException.hpp>
29 #include <xercesc/framework/LocalFileFormatTarget.hpp>
30 
31 #include <iostream>
32 #include <sstream>
33 #include <list>
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <unistd.h>
38 #include <errno.h>
39 
40 using namespace ots;
41 
42 //==============================================================================
43 XmlDocument::XmlDocument(std::string rootName) :
44  rootTagName_(rootName)
45 {
46  INIT_MF("XmlDocument");
47  //__COUT__ << "in" << std::endl;
48  initDocument();
49  rootElement_ = theDocument_->getDocumentElement();
50  //__COUT__ << "out" << std::endl;
51 }
52 
53 //==============================================================================
54 XmlDocument::XmlDocument(const XmlDocument& doc) :
55  rootTagName_(doc.rootTagName_)
56 {
57  //__COUT__ << "in" << std::endl;
58  *this = doc;
59  //__COUT__ << "out" << std::endl;
60 }
61 
62 //==============================================================================
63 XmlDocument& XmlDocument::operator=(const XmlDocument& doc)
64 {
65  //__COUT__ << "in" << std::endl;
66  initDocument();
67  rootElement_ = theDocument_->getDocumentElement();
68  recursiveElementCopy(doc.rootElement_, rootElement_);
69  //__COUT__ << "out" << std::endl;
70  return *this;
71 }
72 
73 //==============================================================================
74 XmlDocument::~XmlDocument(void)
75 {
76  //std::cout << __COUT_HDR_FL__<< "Xml Destructor" << std::endl;
77  terminatePlatform();
78 }
79 
80 //==============================================================================
81 void XmlDocument::initDocument(void)
82 {
83  initPlatform();
84 
85  theImplementation_ = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("Core"));
86 
87  if(theImplementation_)
88  {
89  try
90  {
91  theDocument_ = theImplementation_->createDocument(
92  CONVERT_TO_XML("http://www.w3.org/2001/XMLSchema-instance"), // root element namespace URI.
93  CONVERT_TO_XML(rootTagName_), // root element name
94  0); // theDocument_ type object (DTD).
95  }
96  catch (const xercesc::OutOfMemoryException&)
97  {
98  XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
99  }
100  catch (const xercesc::DOMException& e)
101  {
102  XERCES_STD_QUALIFIER cerr << "DOMException code is: " << e.code << XERCES_STD_QUALIFIER endl;
103  }
104  catch(const xercesc::XMLException& e)
105  {
106  __COUT__ << "Error Message: " << XML_TO_CHAR(e.getMessage()) << std::endl;
107  }
108  catch (...)
109  {
110  XERCES_STD_QUALIFIER cerr << "An error occurred creating the theDocument_" << XERCES_STD_QUALIFIER endl;
111  }
112  }
113  else
114  XERCES_STD_QUALIFIER cerr << "Requested theImplementation_ is not supported" << XERCES_STD_QUALIFIER endl;
115 }
116 
117 //==============================================================================
118 void XmlDocument::initPlatform(void)
119 {
120  try
121  {
122  xercesc::XMLPlatformUtils::Initialize(); // Initialize Xerces infrastructure
123  //std::cout << __COUT_HDR_FL__<< "Initialized new theDocument_" << std::endl;
124  }
125  catch( xercesc::XMLException& e )
126  {
127  __COUT__ << "XML toolkit initialization error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
128  }
129 
130 }
131 
132 //==============================================================================
133 void XmlDocument::terminatePlatform(void)
134 {
135  try
136  {
137  //std::cout << __COUT_HDR_FL__<< "Releasing the document" << std::endl;
138  theDocument_->release();
139  //std::cout << __COUT_HDR_FL__<< "document released" << std::endl;
140  }
141  catch (...)
142  {
143  XERCES_STD_QUALIFIER cerr << "An error occurred destroying the theDocument_" << XERCES_STD_QUALIFIER endl;
144  }
145 
146  try
147  {
148  xercesc::XMLPlatformUtils::Terminate(); // Terminate after release of memory
149  }
150  catch( xercesc::XMLException& e )
151  {
152  __COUT__ << "XML toolkit teardown error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
153  //XMLString::release(&message);
154  }
155 }
156 
157 //==============================================================================
158 //addTextElementToParent
159 // add to parent by pointer to parent
160 // returns pointer to element that is added
161 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, xercesc::DOMElement* parent)
162 {
163  if(parent == 0) return 0;
164  xercesc::DOMElement* child;
165  try
166  {
167  child = theDocument_->createElement(CONVERT_TO_XML(childName));
168  }
169  catch (xercesc::DOMException& e)
170  {
171  __COUT__ << "Can't use the name: " << childName << " to create the child element because the exception says: "
172  << XML_TO_CHAR(e.getMessage()) << ". Very likely you have a name that starts with a number and that's not allowed!" << std::endl;
173  }
174  parent->appendChild(child);
175 
176  try
177  {
178  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML(childText)));
179  }
180  catch(...) //sometimes see TranscodingException
181  {
182  __COUT_ERR__ << "Error caught attempting to create a text node for this text: " <<
183  childText << ". Converting instead to 'Illegal text..'" << std::endl;
184  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML("Illegal text content blocked.")));
185  }
186 
187  return child;
188 }
189 
190 //==============================================================================
191 //addTextElementToParent
192 // add to parent by instance number of parent name
193 // returns pointer to element that is added
194 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, std::string parentName, unsigned int parentIndex)
195 {
196  xercesc::DOMNodeList* nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
197 
198  if(parentIndex >= nodeList->getLength())
199  {
200  __COUT__ << "WARNING: Illegal parent index attempted in tags with name: " << parentName << ", index: " << parentIndex << std::endl;
201  return 0; //illegal index attempted
202  }
203 
204  return addTextElementToParent(childName, childText,(xercesc::DOMElement*)(nodeList->item(parentIndex)));
205 }
206 
207 //==============================================================================
208 void XmlDocument::copyDocument(const xercesc::DOMDocument* toCopy, xercesc::DOMDocument* copy)
209 {
210  recursiveElementCopy(toCopy->getDocumentElement(),copy->getDocumentElement());
211 }
212 
213 //==============================================================================
214 void XmlDocument::recursiveElementCopy(const xercesc::DOMElement* toCopy, xercesc::DOMElement* copy)
215 {
216  xercesc::DOMNodeList* nodeListToCopy = toCopy->getChildNodes(); //get all children of the list to copy
217  xercesc::DOMNode* iNode;
218  xercesc::DOMDocument* copyDocument = copy->getOwnerDocument();
219  for(unsigned int i=0; i<nodeListToCopy->getLength(); i++)
220  {
221  iNode = nodeListToCopy->item(i);
222  xercesc::DOMElement* child = copyDocument->createElement(iNode->getNodeName());
223  copy->appendChild(child);
224  if( iNode->getFirstChild() != 0 && iNode->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
225  {
226  child->appendChild(copyDocument->createTextNode(child->getFirstChild()->getNodeValue()));
227  }
228  recursiveElementCopy((xercesc::DOMElement*)(iNode),child);
229  }
230 }
231 
232 //==============================================================================
233 //XmlDocument::addElementToParent
234 // Add field/value element to XML doc at parent
235 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
236 // children to the new element
237 // On Failure, return -1
238 /*
239 unsigned int XmlDocument::addElementToParent(std::string field, std::string value, xercesc::DOMElement *parentEl, bool verbose)
240 {
241  DOMNodeList *nodeList = parentEl->getChildNodes(); //get all children
242 
243  if(verbose)
244  {
245  //display parent info
246  //std::cout << __COUT_HDR_FL__<< "Parent Name: " << XML_TO_CHAR(parentEl->getNodeName()) << " Field: " << field << " Value: " << value << std::endl;
247  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
248  std::cout << __COUT_HDR_FL__<< "Parent's First Child Node Value: " << XML_TO_CHAR(parentEl->getFirstChild()->getNodeValue()) << std::endl;
249  }
250 
251  //add field/value element
252  DOMElement *newEl = theDocument_->createElement(CONVERT_TO_XML(field));
253  parentEl->appendChild(newEl);
254 
255  DOMText* valueStr = theDocument_->createTextNode(CONVERT_TO_XML(value));
256  newEl->appendChild(valueStr);
257 
258  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
259  return nodeList->getLength() - 2; //return child index among parent's children, not counting first child text node
260  return nodeList->getLength() - 1; //return child index among parent's children
261 }
262 */
263 //==============================================================================
264 //XmlDocument::addDataElement
265 // Add field/value element to XML doc at parent which is returned from getElementsByTagName(parentName), entry number parentNameIndex
266 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
267 // children to the new element
268 // On Failure, return -1
269 /*
270 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, std::string parentName, unsigned int parentNameIndex)
271 {
272  DOMNodeList *nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
273 
274  if(parentNameIndex >= nodeList->getLength()) {
275  __COUT__ << "illegal parent index attempted in tags with name: " << parentName << ", index: " << parentNameIndex << std::endl;
276  return -1; //illegal index attempted
277  }
278 
279  return addElementToParent(field,value,(DOMElement*)(nodeList->item(parentNameIndex)));
280 }
281 */
282 //==============================================================================
283 //XmlDocument::addDataElement
284 // Add field/value element to XML doc at parentIndexArray (with depth of parent indicated by parentIndexArraySize)
285 // If parentIndexArray = NULL, element is added with <DATA> parent
286 // otherwise, parentIndexArray indicates the parent within the node list for <DATA> where
287 // the element will be added
288 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
289 // children to the new element
290 // On Failure, return -1
291 /*
292 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, unsigned int *parentIndexArray, unsigned int parentIndexArraySize)
293 {
294 
295  //__COUT__ << "field: " << field << ", value: " << value << ", parent: " << parentIndexArraySize << std::endl;
296 
297  DOMElement *parentEl = dataElement; // initialize parent to <DATA>
298 
299  if(parentIndexArray) //if there passed an array find parent relative to data element
300  {
301  //std::cout << __COUT_HDR_FL__<< "Using Parent Index Array" << std::endl;
302 
303  DOMNodeList *nodeList;
304 
305  //iterate through nested parents based on parentIndexArray
306  unsigned int tmpi,cntNotTxt;
307  for(unsigned int i=0;i<parentIndexArraySize;++i)
308  {
309  nodeList = parentEl->getChildNodes(); //get all children
310  cntNotTxt = 0;
311 
312  //get cntNotTxt to proper non text node
313  for(tmpi=0;tmpi<nodeList->getLength();++tmpi)
314  {
315  if(((DOMElement*)(nodeList->item(tmpi)))->getNodeType() == DOMNode::TEXT_NODE) continue; //skip text nodes
316 
317  if(cntNotTxt == parentIndexArray[i]) break; //at proper parent node!
318  ++cntNotTxt; //else look for next
319  }
320 
321  //in theory, only first child can be text - ignore text node children
322  //if(parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) ++tmpi;
323 
324  if(tmpi >= nodeList->getLength()) {
325  __COUT__ << "illegal child index attempted in nested parents: " << parentIndexArray[i] << ", depth: " << i << ", tmpi: " << tmpi << std::endl;
326  return -1; //illegal child index attempted in nested parents
327  }
328 
329  parentEl = (DOMElement*)(nodeList->item(tmpi));
330  }
331  }
332 
333  return addElementToParent(field,value,parentEl);
334 }
335 */
336 //==============================================================================
337 //XmlDocument::addXmlData
338 // Append <DATA> from xmldoc to this XML doc
339 // On Success, The child index within <DATA> of the first element is returned
340 // On Failure, return -1
341 /*
342 unsigned int XmlDocument::addXmlData (XmlDocument *xmldoc)
343 {
344  //
345 
346  int retIndex = dataElement->getChildNodes()->getLength(); //will be index of first appended data element
347 
348  //add all first level child elements of data and recurse on them
349  DOMNodeList *nodeList = xmldoc->dataElement->getChildNodes(); //get all children within data
350  for(unsigned int i = 0; i<nodeList->getLength();++i)
351  {
352  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node children
353  continue;
354 
355  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),dataElement);
356  }
357 
358  return retIndex;
359 }
360 */
361 //==============================================================================
362 //XmlDocument::recursiveAddElementToParent
363 // add currEl and its children tree to parentEl
364 /*
365 void XmlDocument::recursiveAddElementToParent (DOMElement *currEl, DOMElement *parentEl)
366 {
367 std::string field, value = "";
368 
369  //char *tmpField =
370  field = XML_TO_CHAR(currEl->getNodeName());//XML_TO_CHAR(currEl->getNodeName());
371  //field = tmpField;
372  //XMLString::release( &tmpField );
373 
374  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
375  value = escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
376 
377  //insert currEl
378  addElementToParent(field,value,parentEl);
379 
380  //insert rest of currEl tree
381  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children of currEl
382  for(unsigned int i = 0; i<nodeList->getLength();++i)
383  {
384  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node children
385  continue;
386 
387  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),currEl);
388  }
389 }
390 */
391 //==============================================================================
392 //XmlDocument::outputXmlDocument
393 // recurse through XML theDocument_ and std out and output to stream parameter if not null
394 void XmlDocument::outputXmlDocument (std::ostringstream *out, bool dispStdOut)
395 {
396  recursiveOutputXmlDocument(theDocument_->getDocumentElement(),out,dispStdOut);
397 }
398 
399 //==============================================================================
400 //XmlDocument::recursiveOutputXmlDocument
401 // recursively printout XML theDocument_ to std out and output stream if not null
402 void XmlDocument::recursiveOutputXmlDocument (xercesc::DOMElement *currEl, std::ostringstream *out, bool dispStdOut, std::string tabStr)
403 {
404  //open field tag
405  if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "<" << XML_TO_CHAR(currEl->getNodeName()) ;
406  if(out) *out << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
407 
408  //insert value if text node child
409  if( currEl->getFirstChild() != NULL &&
410  currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
411  {
412  if(dispStdOut) std::cout << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
413  if(out) *out << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
414  }
415 
416  xercesc::DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
417 
418  //close opening field tag
419  if(dispStdOut) std::cout << ((nodeList->getLength() == 0 ||
420  (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
421  << ">" << " len:" << nodeList->getLength() << std::endl;
422  if(out) *out << ((nodeList->getLength() == 0 ||
423  (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
424  << ">" << std::endl;
425 
426  //insert children
427  std::string newTabStr = tabStr + "\t";
428  for(unsigned int i = 0; i<nodeList->getLength();++i)
429  if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE) //ignore text node children
430  recursiveOutputXmlDocument ((xercesc::DOMElement*)(nodeList->item(i)),out,dispStdOut,newTabStr);
431 
432  //close tag if children
433  if(nodeList->getLength() > 1 || (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() != xercesc::DOMNode::TEXT_NODE))
434  {
435  if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
436  if(out) *out << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
437  }
438 }
439 
440 //==============================================================================
441 //XmlDocument::getDataElement
442 // returns the value for field found occurance number of times
443 // returns empty std::string "" if field was not found
444 /*
445 std::string XmlDocument::getDataElement (const std::string field, const unsigned int occurance)
446 {
447  unsigned int count = 0;
448  return recursiveFindElement(theDocument_->getDocumentElement(),field,occurance,count);
449 }
450 */
451 //==============================================================================
452 //XmlDocument::recursiveFindElement
453 // recursively searches and returns the value for field found occurance number of times
454 /*
455 std::string XmlDocument::recursiveFindElement (DOMElement *currEl, const std::string field, const unsigned int occurance, unsigned int &count)
456 {
457  if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++) //found, done!!
458  {
459  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
460  return escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
461  else
462  return ""; //empty value attribute
463  }
464 
465  std::string retStr;
466  //look through children recursively
467  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
468  for(unsigned int i = 0; i<nodeList->getLength();++i)
469  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node children
470  {
471  retStr = recursiveFindElement ((DOMElement*)(nodeList->item(i)),field,occurance,count);
472  if(retStr != "") return retStr; //found among children already, done
473  //else continue search within children recursively
474  }
475  return ""; //nothing found
476 }
477 */
478 //==============================================================================
479 //XmlDocument::getAllDataElements
480 // returns all of the values found for the field in a vector
481 // if none found vector will have size 0
482 /*
483 std::vector<std::string> XmlDocument::getAllDataElements (std::string field)
484 {
485  vector<string> retVec;
486 
487  recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
488 
489  return retVec;
490 }
491 */
492 //==============================================================================
493 //XmlDocument::recursiveFindElement
494 // recursively searches and returns the value for field found occurance number of times
495 /*
496 void XmlDocument::recursiveFindAllElements (DOMElement *currEl, const std::string field,std::vector<std::string> *retVec)
497 {
498  if (XML_TO_CHAR(currEl->getNodeName()) == field &&
499  currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
500  retVec->push_back(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
501 
502 
503  //look through children recursively
504  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
505  for(unsigned int i = 0; i<nodeList->getLength();++i)
506  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node children
507  recursiveFindAllElements ((DOMElement*)(nodeList->item(i)),field,retVec);
508 }
509 */
510 //==============================================================================
511 //XmlDocument::escapeString
512 // convert quotes to html quote characters &apos; = ' and &quot; = "
513 // remove new line characters
514 // and (if !allowWhiteSpace) remove white space (so that read from file white space artifact removed)
515 //
516 // convert &amp; = &
517 // if(allowWhiteSpace) convert \t to 8 &#160; spaces and \n to <br>
518 std::string XmlDocument::escapeString(std::string inString, bool allowWhiteSpace)
519 {
520  bool doit = false;
521 
522  unsigned int ws = -1;
523  char htmlTmp[6];
524 
525  for(unsigned int i=0; i<inString.length(); i++)
526  if(inString[i] != ' ')
527  {
528  if(doit) std::cout << __COUT_HDR_FL__<< inString[i] << ":" <<
529  (int)inString[i] << ":" << inString << std::endl;
530 
531  //remove new lines and unprintable characters
532  if(inString[i] == '\r' || inString[i] == '\n' ||//remove new line chars
533  inString[i] == '\t' || //remove tabs
534  inString[i] < 32 || //remove un-printable characters (they mess up xml interpretation)
535  (inString[i] > char(126) && inString[i] < char(161))) //this is aggravated by the bug in MFextensions (though Eric says he fixed on 8/24/2016)
536  //Note: greater than 255 should be impossible if by byte (but there are html chracters in 300s and 8000s)
537  {
538  if(//maintain new lines and tabs
539  inString[i] == '\n')
540  {
541  if(allowWhiteSpace)
542  {
543  sprintf(htmlTmp,"&#%3.3d",inString[i]);
544  inString.insert(i,htmlTmp); //insert html str sequence
545  inString.replace(i+5,1,1,';'); // replace special character with ;
546  i+=6; //skip to next char to check
547  --i;
548  }
549  else //translate to ' '
550  inString[i] = ' ';
551  }
552  else if(//maintain new lines and tabs
553  inString[i] == '\t')
554  {
555  if(allowWhiteSpace)
556  {
557  if(0)
558  {
559  //tab = 8 spaces
560  sprintf(htmlTmp,"&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160");
561  inString.insert(i,htmlTmp); //insert html str sequence
562  inString.replace(i+47,1,1,';'); // replace special character with ;
563  i+=48; //skip to next char to check
564  --i;
565  }
566  else //tab = 0x09
567  {
568 
569  sprintf(htmlTmp,"&#009");
570  inString.insert(i,htmlTmp); //insert html str sequence
571  inString.replace(i+5,1,1,';'); // replace special character with ;
572  i+=6; //skip to next char to check
573  --i;
574  }
575  }
576  else //translate to ' '
577  inString[i] = ' ';
578  }
579  else
580  {
581  inString.erase(i,1); //erase character
582  --i; //step back so next char to check is correct
583  }
584  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
585  continue;
586  }
587 
588  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
589 
590  //replace special characters
591  if(inString[i] == '\"' || inString[i] == '\'')
592  {
593  inString.insert(i,(inString[i] == '\'')?"&apos":"&quot"); //insert HTML name before quotes
594  inString.replace(i+5,1,1,';'); // replace special character with ;
595  i+=5; //skip to next char to check
596  //std::cout << __COUT_HDR_FL__<< inString << std::endl;
597  }
598  else if(inString[i] == '&')
599  {
600  inString.insert(i,"&amp"); //insert HTML name before special character
601  inString.replace(i+4,1,1,';'); // replace special character with ;
602  i+=4; //skip to next char to check
603  }
604  else if(inString[i] == '<' || inString[i] == '>')
605  {
606  inString.insert(i,(inString[i] == '<')?"&lt":"&gt"); //insert HTML name before special character
607  inString.replace(i+3,1,1,';'); // replace special character with ;
608  i+=3; //skip to next char to check
609  }
610  else if(inString[i] >= char(161) && inString[i] <= char(255)) //printable special characters
611  {
612  sprintf(htmlTmp,"&#%3.3d",inString[i]);
613  inString.insert(i,htmlTmp); //insert html number sequence
614  inString.replace(i+5,1,1,';'); // replace special character with ;
615  i+=5; //skip to next char to check
616  }
617 
618  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
619 
620  ws = i; //last non white space char
621  }
622  else if(allowWhiteSpace) //keep white space if allowed
623  {
624  if(i-1 == ws) continue; //dont do anything for first white space
625 
626  //for second white space add 2, and 1 from then
627  if(i-2 == ws)
628  {
629  inString.insert(i,"&#160;"); //insert html space
630  i+=6; //skip to point at space again
631  }
632  inString.insert(i,"&#160"); //insert html space
633  inString.replace(i+5,1,1,';'); // replace special character with ;
634  i+=5; //skip to next char to check
635  ws = i;
636  }
637 
638  if(doit) std::cout << __COUT_HDR_FL__<< inString.size() << " " << ws << std::endl;
639 
640  inString.substr(0,ws+1);
641 
642  if(doit) std::cout << __COUT_HDR_FL__<< inString.size() << " " << inString << std::endl;
643 
644  if(ws == (unsigned int)-1) return ""; //empty std::string since all white space
645  return inString.substr(0,ws+1); //trim right white space
646 }
647 
648 
649 //==============================================================================
650 //XmlDocument::recursiveRemoveChild
651 // remove child and all of child's sub-tree from parent
652 void XmlDocument::recursiveRemoveChild(xercesc::DOMElement *childEl, xercesc::DOMElement *parentEl)
653 {
654  //release child's children first
655  xercesc::DOMNodeList* nodeList = childEl->getChildNodes(); //get all children within data
656  for(unsigned int i = 0; i<nodeList->getLength(); ++i)
657  recursiveRemoveChild((xercesc::DOMElement*)(nodeList->item(nodeList->getLength()-1-i)),childEl);
658 
659  //then release child
660  parentEl->removeChild(childEl);
661  childEl->release();
662 }
663 
664 //==============================================================================
665 //XmlDocument::saveXmlDocument
666 // wrapper for private outputXML
667 // Warning: filePath must be accessible or program will crash!
668 void XmlDocument::saveXmlDocument (std::string filePath)
669 {
670  std::cout << __COUT_HDR_FL__<< "Saving theDocument_ to file: " << filePath << std::endl;
671  //Return the first registered theImplementation_ that has the desired features. In this case, we are after a DOM theImplementation_ that has the LS feature... or Load/Save.
672  //DOMImplementation *theImplementation_ = DOMImplementationRegistry::getDOMImplementation(L"LS");
673  xercesc::DOMImplementation *saveImplementation = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("LS"));
674 
675  std::cout << __COUT_HDR_FL__<< "XERCES Version: " << _XERCES_VERSION << std::endl;
676 
677 #if _XERCES_VERSION >= 30000
678 
679  //std::cout << __COUT_HDR_FL__<< "making file" << filePath << std::endl;
680  // Create a DOMLSSerializer which is used to serialize a DOM tree into an XML theDocument_.
681  xercesc::DOMLSSerializer *serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSSerializer();
682 
683  // Make the output more human readable by inserting line feeds.
684  if (serializer->getDomConfig()->canSetParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true))
685  serializer->getDomConfig()->setParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
686 
687  // The end-of-line sequence of characters to be used in the XML being written out.
688  serializer->setNewLine(CONVERT_TO_XML("\r\n"));
689 
690  // Convert the path into Xerces compatible XMLCh*.
691  //XMLCh *tempFilePath = const_cast<XMLCh*>(CONVERT_TO_XML(filePath));
692 
693  // Specify the target for the XML output.
694  xercesc::XMLFormatTarget* formatTarget;
695  try
696  {
697  //formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
698  formatTarget = new xercesc::LocalFileFormatTarget(filePath.c_str());
699  }
700  catch(...)
701  {
702  std::cout << __COUT_HDR_FL__<< "Inaccessible file path: " << filePath << std::endl;
703  serializer->release();
704  //xercesc::XMLString::release(&tempFilePath);
705 
706  return;
707  }
708 
709  // Create a new empty output destination object.
710  xercesc::DOMLSOutput *output = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSOutput();
711 
712  // Set the stream to our target.
713  output->setByteStream(formatTarget);
714  // Write the serialized output to the destination.
715  serializer->write(theDocument_, output);
716  serializer->release();
717  //xercesc::XMLString::release(&tempFilePath);
718  delete formatTarget;
719 #else
720 
721  xercesc::DOMWriter *serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createDOMWriter();
722  serializer->setFeature(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
723 
724  /*
725  Choose a location for the serialized output. The 3 options are:
726  1) StdOutFormatTarget (std output stream - good for debugging)
727  2) MemBufFormatTarget (to Memory)
728  3) LocalFileFormatTarget (save to file)
729  (Note: You'll need a different header file for each one)
730  */
731  //XMLFormatTarget* pTarget = new StdOutFormatTarget();
732  // Convert the path into Xerces compatible XMLCh*.
733  XMLCh *tempFilePath = xercesc::XMLString::transcode(filePath.c_str());
734  xercesc::XMLFormatTarget* formatTarget;
735  try
736  {
737  formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
738  }
739  catch(...)
740  {
741  std::cout << __COUT_HDR_FL__<< "Inaccessible file path: " << filePath << std::endl;
742  serializer->release();
743  xercesc::XMLString::release(&tempFilePath);
744  return;
745  }
746 
747  // Write the serialized output to the target.
748 
749  serializer->writeNode(formatTarget, *theDocument_);
750  serializer->release();
751  xercesc::XMLString::release(&tempFilePath);
752  delete formatTarget;
753 #endif
754 
755  // Cleanup.
756  //std::cout << __COUT_HDR_FL__<< "delete format target" << std::endl;
757 
758 
759 #if _XERCES_VERSION >= 30000
760 
761  //std::cout << __COUT_HDR_FL__<< "delete output0" << std::endl;
762  output->release();
763  //std::cout << __COUT_HDR_FL__<< "delete output1" << std::endl;
764 
765 #endif
766 }
767 
768 
769 //==============================================================================
770 bool XmlDocument::loadXmlDocument (std::string filePath)
771 {
772  std::cout << __COUT_HDR_FL__<< "Loading theDocument_ from file: " << filePath << std::endl;
773 
774  struct stat fileStatus;
775 
776  if(stat(filePath.c_str(), &fileStatus) != 0)
777  {
778  std::cout << __COUT_HDR_FL__<< "File not accessible." << std::endl;
779  return false;
780  }
781 
782  //reset xml platform and theDocument_
783  terminatePlatform();
784  initPlatform();
785 
786  xercesc::XercesDOMParser* parser = new xercesc::XercesDOMParser;
787  // Configure xercesc::DOM parser.
788  parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
789  parser->setDoNamespaces ( true );
790  parser->setDoSchema ( true );
791  parser->useCachedGrammarInParse ( false );
792 
793  try
794  {
795  parser->parse( filePath.c_str() );
796 
797  //theDocument_ memory object owned by the parent parser object
798  theDocument_ = parser->adoptDocument();//instead of getDocument() so parser will not free theDocument_ when released
799 
800  // Get the top-level element: Name is "root". No attributes for "root"
801  rootElement_ = theDocument_->getDocumentElement();
802  if( !rootElement_ )
803  throw(std::runtime_error( "empty XML theDocument_" ));
804 
805  }
806  catch( xercesc::XMLException& e )
807  {
808  std::cout << __COUT_HDR_FL__<< "Error parsing file." << std::endl;
809  return false;
810  }
811  delete parser;
812 
813  return true;
814 }
815 
816 
817 //==============================================================================
818 //XmlDocument::recursiveOutputXmlDocument
819 // recursively printout XML theDocument_ to std out and output stream if not null
820 /*
821 void XmlDocument::recursiveFixTextFields(DOMElement *currEl)
822 {
823  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
824 
825  //recurse through children
826  for(unsigned int i = 0; i<nodeList->getLength();++i)
827  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //fix text nodes
828  ((DOMElement*)(nodeList->item(i)))->setTextContent(CONVERT_TO_XML( //change text value to escaped version
829  escapeString(XML_TO_CHAR(((DOMElement*)(nodeList->item(i)))->getNodeValue()))));
830  else
831  recursiveFixTextFields ((DOMElement*)(nodeList->item(i)));
832 }
833 */
834