otsdaq  v1_01_02
 All Classes Namespaces Functions
XmlDocument.cc
1 
2 #include "otsdaq-core/XmlUtilities/XmlDocument.h"
3 #include "otsdaq-core/XmlUtilities/ConvertToXML.h"
4 #include "otsdaq-core/XmlUtilities/ConvertFromXML.h"
5 #include "otsdaq-core/MessageFacility/MessageFacility.h"
6 #include "otsdaq-core/Macros/CoutHeaderMacros.h"
7 
8 #include <xercesc/parsers/XercesDOMParser.hpp>
9 #include <stdexcept>
10 #include <xercesc/dom/DOM.hpp>
11 #include <xercesc/dom/DOMDocument.hpp>
12 #include <xercesc/dom/DOMDocumentType.hpp>
13 #include <xercesc/dom/DOMElement.hpp>
14 #include <xercesc/dom/DOMImplementation.hpp>
15 #include <xercesc/dom/DOMImplementationRegistry.hpp>
16 #include <xercesc/dom/DOMImplementationLS.hpp>
17 //#include <xercesc/dom/DOMLSSerializer.hpp>
18 //#include <xercesc/dom/DOMLSOutput.hpp>
19 #include <xercesc/dom/DOMNodeIterator.hpp>
20 #include <xercesc/dom/DOMNodeList.hpp>
21 #include <xercesc/dom/DOMText.hpp>
22 #include <xercesc/validators/common/Grammar.hpp>
23 
24 #include <xercesc/parsers/XercesDOMParser.hpp>
25 #include <xercesc/util/XMLUni.hpp>
26 #include <xercesc/util/XercesDefs.hpp>
27 
28 #include <xercesc/util/OutOfMemoryException.hpp>
29 #include <xercesc/framework/LocalFileFormatTarget.hpp>
30 
31 #include <iostream>
32 #include <sstream>
33 #include <list>
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <unistd.h>
38 #include <errno.h>
39 
40 using namespace ots;
41 
42 //==============================================================================
43 XmlDocument::XmlDocument(std::string rootName) :
44  rootTagName_(rootName)
45 {
46  INIT_MF("XmlDocument");
47  //__MOUT__ << "in" << std::endl;
48  initDocument();
49  rootElement_ = theDocument_->getDocumentElement();
50  //__MOUT__ << "out" << std::endl;
51 }
52 
53 //==============================================================================
54 XmlDocument::XmlDocument(const XmlDocument& doc) :
55  rootTagName_(doc.rootTagName_)
56 {
57  //__MOUT__ << "in" << std::endl;
58  *this = doc;
59  //__MOUT__ << "out" << std::endl;
60 }
61 
62 //==============================================================================
63 XmlDocument& XmlDocument::operator=(const XmlDocument& doc)
64 {
65  //__MOUT__ << "in" << std::endl;
66  initDocument();
67  rootElement_ = theDocument_->getDocumentElement();
68  recursiveElementCopy(doc.rootElement_, rootElement_);
69  //__MOUT__ << "out" << std::endl;
70  return *this;
71 }
72 
73 //==============================================================================
74 XmlDocument::~XmlDocument(void)
75 {
76  //std::cout << __COUT_HDR_FL__<< "Xml Destructor" << std::endl;
77  terminatePlatform();
78 }
79 
80 //==============================================================================
81 void XmlDocument::initDocument(void)
82 {
83  initPlatform();
84 
85  theImplementation_ = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("Core"));
86 
87  if(theImplementation_)
88  {
89  try
90  {
91  theDocument_ = theImplementation_->createDocument(
92  CONVERT_TO_XML("http://www.w3.org/2001/XMLSchema-instance"), // root element namespace URI.
93  CONVERT_TO_XML(rootTagName_), // root element name
94  0); // theDocument_ type object (DTD).
95  }
96  catch (const xercesc::OutOfMemoryException&)
97  {
98  XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
99  }
100  catch (const xercesc::DOMException& e)
101  {
102  XERCES_STD_QUALIFIER cerr << "DOMException code is: " << e.code << XERCES_STD_QUALIFIER endl;
103  }
104  catch(const xercesc::XMLException& e)
105  {
106  __MOUT__ << "Error Message: " << XML_TO_CHAR(e.getMessage()) << std::endl;
107  }
108  catch (...)
109  {
110  XERCES_STD_QUALIFIER cerr << "An error occurred creating the theDocument_" << XERCES_STD_QUALIFIER endl;
111  }
112  }
113  else
114  XERCES_STD_QUALIFIER cerr << "Requested theImplementation_ is not supported" << XERCES_STD_QUALIFIER endl;
115 }
116 
117 //==============================================================================
118 void XmlDocument::initPlatform(void)
119 {
120  try
121  {
122  xercesc::XMLPlatformUtils::Initialize(); // Initialize Xerces infrastructure
123  //std::cout << __COUT_HDR_FL__<< "Initialized new theDocument_" << std::endl;
124  }
125  catch( xercesc::XMLException& e )
126  {
127  __MOUT__ << "XML toolkit initialization error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
128  }
129 
130 }
131 
132 //==============================================================================
133 void XmlDocument::terminatePlatform(void)
134 {
135  try
136  {
137  //std::cout << __COUT_HDR_FL__<< "Releasing the document" << std::endl;
138  theDocument_->release();
139  //std::cout << __COUT_HDR_FL__<< "document released" << std::endl;
140  }
141  catch (...)
142  {
143  XERCES_STD_QUALIFIER cerr << "An error occurred destroying the theDocument_" << XERCES_STD_QUALIFIER endl;
144  }
145 
146  try
147  {
148  xercesc::XMLPlatformUtils::Terminate(); // Terminate after release of memory
149  }
150  catch( xercesc::XMLException& e )
151  {
152  __MOUT__ << "XML toolkit teardown error: " << XML_TO_CHAR(e.getMessage()) << std::endl;
153  //XMLString::release(&message);
154  }
155 }
156 
157 //==============================================================================
158 //addTextElementToParent
159 // add to parent by pointer to parent
160 // returns pointer to element that is added
161 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, xercesc::DOMElement* parent)
162 {
163  if(parent == 0) return 0;
164  xercesc::DOMElement* child;
165  try
166  {
167  child = theDocument_->createElement(CONVERT_TO_XML(childName));
168  }
169  catch (xercesc::DOMException& e)
170  {
171  __MOUT__ << "Can't use the name: " << childName << " to create the child element because the exception says: "
172  << XML_TO_CHAR(e.getMessage()) << ". Very likely you have a name that starts with a number and that's not allowed!" << std::endl;
173  }
174  parent->appendChild(child);
175  child->appendChild(theDocument_->createTextNode(CONVERT_TO_XML(childText)));
176 
177  return child;
178 }
179 
180 //==============================================================================
181 //addTextElementToParent
182 // add to parent by instance number of parent name
183 // returns pointer to element that is added
184 xercesc::DOMElement* XmlDocument::addTextElementToParent(std::string childName, std::string childText, std::string parentName, unsigned int parentIndex)
185 {
186  xercesc::DOMNodeList* nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
187 
188  if(parentIndex >= nodeList->getLength())
189  {
190  __MOUT__ << "WARNING: Illegal parent index attempted in tags with name: " << parentName << ", index: " << parentIndex << std::endl;
191  return 0; //illegal index attempted
192  }
193 
194  return addTextElementToParent(childName, childText,(xercesc::DOMElement*)(nodeList->item(parentIndex)));
195 }
196 
197 //==============================================================================
198 void XmlDocument::copyDocument(const xercesc::DOMDocument* toCopy, xercesc::DOMDocument* copy)
199 {
200  recursiveElementCopy(toCopy->getDocumentElement(),copy->getDocumentElement());
201 }
202 
203 //==============================================================================
204 void XmlDocument::recursiveElementCopy(const xercesc::DOMElement* toCopy, xercesc::DOMElement* copy)
205 {
206  xercesc::DOMNodeList* nodeListToCopy = toCopy->getChildNodes(); //get all children of the list to copy
207  xercesc::DOMNode* iNode;
208  xercesc::DOMDocument* copyDocument = copy->getOwnerDocument();
209  for(unsigned int i=0; i<nodeListToCopy->getLength(); i++)
210  {
211  iNode = nodeListToCopy->item(i);
212  xercesc::DOMElement* child = copyDocument->createElement(iNode->getNodeName());
213  copy->appendChild(child);
214  if( iNode->getFirstChild() != 0 && iNode->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
215  {
216  child->appendChild(copyDocument->createTextNode(child->getFirstChild()->getNodeValue()));
217  }
218  recursiveElementCopy((xercesc::DOMElement*)(iNode),child);
219  }
220 }
221 
222 //==============================================================================
223 //XmlDocument::addElementToParent
224 // Add field/value element to XML doc at parent
225 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
226 // children to the new element
227 // On Failure, return -1
228 /*
229 unsigned int XmlDocument::addElementToParent(std::string field, std::string value, xercesc::DOMElement *parentEl, bool verbose)
230 {
231  DOMNodeList *nodeList = parentEl->getChildNodes(); //get all children
232 
233  if(verbose)
234  {
235  //display parent info
236  //std::cout << __COUT_HDR_FL__<< "Parent Name: " << XML_TO_CHAR(parentEl->getNodeName()) << " Field: " << field << " Value: " << value << std::endl;
237  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
238  std::cout << __COUT_HDR_FL__<< "Parent's First Child Node Value: " << XML_TO_CHAR(parentEl->getFirstChild()->getNodeValue()) << std::endl;
239  }
240 
241  //add field/value element
242  DOMElement *newEl = theDocument_->createElement(CONVERT_TO_XML(field));
243  parentEl->appendChild(newEl);
244 
245  DOMText* valueStr = theDocument_->createTextNode(CONVERT_TO_XML(value));
246  newEl->appendChild(valueStr);
247 
248  if( parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE)
249  return nodeList->getLength() - 2; //return child index among parent's children, not counting first child text node
250  return nodeList->getLength() - 1; //return child index among parent's children
251 }
252 */
253 //==============================================================================
254 //XmlDocument::addDataElement
255 // Add field/value element to XML doc at parent which is returned from getElementsByTagName(parentName), entry number parentNameIndex
256 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
257 // children to the new element
258 // On Failure, return -1
259 /*
260 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, std::string parentName, unsigned int parentNameIndex)
261 {
262  DOMNodeList *nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(parentName));
263 
264  if(parentNameIndex >= nodeList->getLength()) {
265  __MOUT__ << "illegal parent index attempted in tags with name: " << parentName << ", index: " << parentNameIndex << std::endl;
266  return -1; //illegal index attempted
267  }
268 
269  return addElementToParent(field,value,(DOMElement*)(nodeList->item(parentNameIndex)));
270 }
271 */
272 //==============================================================================
273 //XmlDocument::addDataElement
274 // Add field/value element to XML doc at parentIndexArray (with depth of parent indicated by parentIndexArraySize)
275 // If parentIndexArray = NULL, element is added with <DATA> parent
276 // otherwise, parentIndexArray indicates the parent within the node list for <DATA> where
277 // the element will be added
278 // On Success, The child index of the added element with respect to the parent is returned and can be used to add
279 // children to the new element
280 // On Failure, return -1
281 /*
282 unsigned int XmlDocument::addDataElement ( std::string field, std::string value, unsigned int *parentIndexArray, unsigned int parentIndexArraySize)
283 {
284 
285  //__MOUT__ << "field: " << field << ", value: " << value << ", parent: " << parentIndexArraySize << std::endl;
286 
287  DOMElement *parentEl = dataElement; // initialize parent to <DATA>
288 
289  if(parentIndexArray) //if there passed an array find parent relative to data element
290  {
291  //std::cout << __COUT_HDR_FL__<< "Using Parent Index Array" << std::endl;
292 
293  DOMNodeList *nodeList;
294 
295  //iterate through nested parents based on parentIndexArray
296  unsigned int tmpi,cntNotTxt;
297  for(unsigned int i=0;i<parentIndexArraySize;++i)
298  {
299  nodeList = parentEl->getChildNodes(); //get all children
300  cntNotTxt = 0;
301 
302  //get cntNotTxt to proper non text node
303  for(tmpi=0;tmpi<nodeList->getLength();++tmpi)
304  {
305  if(((DOMElement*)(nodeList->item(tmpi)))->getNodeType() == DOMNode::TEXT_NODE) continue; //skip text nodes
306 
307  if(cntNotTxt == parentIndexArray[i]) break; //at proper parent node!
308  ++cntNotTxt; //else look for next
309  }
310 
311  //in theory, only first child can be text - ignore text node children
312  //if(parentEl->getFirstChild() != NULL && parentEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) ++tmpi;
313 
314  if(tmpi >= nodeList->getLength()) {
315  __MOUT__ << "illegal child index attempted in nested parents: " << parentIndexArray[i] << ", depth: " << i << ", tmpi: " << tmpi << std::endl;
316  return -1; //illegal child index attempted in nested parents
317  }
318 
319  parentEl = (DOMElement*)(nodeList->item(tmpi));
320  }
321  }
322 
323  return addElementToParent(field,value,parentEl);
324 }
325 */
326 //==============================================================================
327 //XmlDocument::addXmlData
328 // Append <DATA> from xmldoc to this XML doc
329 // On Success, The child index within <DATA> of the first element is returned
330 // On Failure, return -1
331 /*
332 unsigned int XmlDocument::addXmlData (XmlDocument *xmldoc)
333 {
334  //
335 
336  int retIndex = dataElement->getChildNodes()->getLength(); //will be index of first appended data element
337 
338  //add all first level child elements of data and recurse on them
339  DOMNodeList *nodeList = xmldoc->dataElement->getChildNodes(); //get all children within data
340  for(unsigned int i = 0; i<nodeList->getLength();++i)
341  {
342  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node children
343  continue;
344 
345  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),dataElement);
346  }
347 
348  return retIndex;
349 }
350 */
351 //==============================================================================
352 //XmlDocument::recursiveAddElementToParent
353 // add currEl and its children tree to parentEl
354 /*
355 void XmlDocument::recursiveAddElementToParent (DOMElement *currEl, DOMElement *parentEl)
356 {
357 std::string field, value = "";
358 
359  //char *tmpField =
360  field = XML_TO_CHAR(currEl->getNodeName());//XML_TO_CHAR(currEl->getNodeName());
361  //field = tmpField;
362  //XMLString::release( &tmpField );
363 
364  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
365  value = escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
366 
367  //insert currEl
368  addElementToParent(field,value,parentEl);
369 
370  //insert rest of currEl tree
371  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children of currEl
372  for(unsigned int i = 0; i<nodeList->getLength();++i)
373  {
374  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //ignore text node children
375  continue;
376 
377  recursiveAddElementToParent((DOMElement*)(nodeList->item(i)),currEl);
378  }
379 }
380 */
381 //==============================================================================
382 //XmlDocument::outputXmlDocument
383 // recurse through XML theDocument_ and std out and output to stream parameter if not null
384 void XmlDocument::outputXmlDocument (std::ostringstream *out, bool dispStdOut)
385 {
386  recursiveOutputXmlDocument(theDocument_->getDocumentElement(),out,dispStdOut);
387 }
388 
389 //==============================================================================
390 //XmlDocument::recursiveOutputXmlDocument
391 // recursively printout XML theDocument_ to std out and output stream if not null
392 void XmlDocument::recursiveOutputXmlDocument (xercesc::DOMElement *currEl, std::ostringstream *out, bool dispStdOut, std::string tabStr)
393 {
394  //open field tag
395  if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "<" << XML_TO_CHAR(currEl->getNodeName()) ;
396  if(out) *out << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
397 
398  //insert value if text node child
399  if( currEl->getFirstChild() != NULL &&
400  currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE) //if has a text node first, insert as value attribute
401  {
402  if(dispStdOut) std::cout << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
403  if(out) *out << " value='" << (XML_TO_CHAR(currEl->getFirstChild()->getNodeValue())) << "'";
404  }
405 
406  xercesc::DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
407 
408  //close opening field tag
409  if(dispStdOut) std::cout << ((nodeList->getLength() == 0 ||
410  (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
411  << ">" << " len:" << nodeList->getLength() << std::endl;
412  if(out) *out << ((nodeList->getLength() == 0 ||
413  (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
414  << ">" << std::endl;
415 
416  //insert children
417  std::string newTabStr = tabStr + "\t";
418  for(unsigned int i = 0; i<nodeList->getLength();++i)
419  if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE) //ignore text node children
420  recursiveOutputXmlDocument ((xercesc::DOMElement*)(nodeList->item(i)),out,dispStdOut,newTabStr);
421 
422  //close tag if children
423  if(nodeList->getLength() > 1 || (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() != xercesc::DOMNode::TEXT_NODE))
424  {
425  if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
426  if(out) *out << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
427  }
428 }
429 
430 //==============================================================================
431 //XmlDocument::getDataElement
432 // returns the value for field found occurance number of times
433 // returns empty std::string "" if field was not found
434 /*
435 std::string XmlDocument::getDataElement (const std::string field, const unsigned int occurance)
436 {
437  unsigned int count = 0;
438  return recursiveFindElement(theDocument_->getDocumentElement(),field,occurance,count);
439 }
440 */
441 //==============================================================================
442 //XmlDocument::recursiveFindElement
443 // recursively searches and returns the value for field found occurance number of times
444 /*
445 std::string XmlDocument::recursiveFindElement (DOMElement *currEl, const std::string field, const unsigned int occurance, unsigned int &count)
446 {
447  if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++) //found, done!!
448  {
449  if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
450  return escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
451  else
452  return ""; //empty value attribute
453  }
454 
455  std::string retStr;
456  //look through children recursively
457  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
458  for(unsigned int i = 0; i<nodeList->getLength();++i)
459  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node children
460  {
461  retStr = recursiveFindElement ((DOMElement*)(nodeList->item(i)),field,occurance,count);
462  if(retStr != "") return retStr; //found among children already, done
463  //else continue search within children recursively
464  }
465  return ""; //nothing found
466 }
467 */
468 //==============================================================================
469 //XmlDocument::getAllDataElements
470 // returns all of the values found for the field in a vector
471 // if none found vector will have size 0
472 /*
473 std::vector<std::string> XmlDocument::getAllDataElements (std::string field)
474 {
475  vector<string> retVec;
476 
477  recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
478 
479  return retVec;
480 }
481 */
482 //==============================================================================
483 //XmlDocument::recursiveFindElement
484 // recursively searches and returns the value for field found occurance number of times
485 /*
486 void XmlDocument::recursiveFindAllElements (DOMElement *currEl, const std::string field,std::vector<std::string> *retVec)
487 {
488  if (XML_TO_CHAR(currEl->getNodeName()) == field &&
489  currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == DOMNode::TEXT_NODE) //if has a text node first, return as value attribute
490  retVec->push_back(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
491 
492 
493  //look through children recursively
494  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
495  for(unsigned int i = 0; i<nodeList->getLength();++i)
496  if(nodeList->item(i)->getNodeType() != DOMNode::TEXT_NODE) //ignore text node children
497  recursiveFindAllElements ((DOMElement*)(nodeList->item(i)),field,retVec);
498 }
499 */
500 //==============================================================================
501 //XmlDocument::escapeString
502 // convert quotes to html quote characters &apos; = ' and &quot; = "
503 // remove new line characters
504 // and (if !allowWhiteSpace) remove white space (so that read from file white space artifact removed)
505 //
506 // convert &amp; = &
507 // if(allowWhiteSpace) convert \t to 8 &#160; spaces and \n to <br>
508 std::string XmlDocument::escapeString(std::string inString, bool allowWhiteSpace)
509 {
510  bool doit = false;
511 
512  unsigned int ws = -1;
513  char htmlTmp[6];
514 
515  for(unsigned int i=0; i<inString.length(); i++)
516  if(inString[i] != ' ')
517  {
518 
519 
520  if(doit) std::cout << __COUT_HDR_FL__<< inString[i] << ":" <<
521  (int)inString[i] << ":" << inString << std::endl;
522 
523  //remove new lines and unprintable characters
524  if(inString[i] == '\r' || inString[i] == '\n' ||//remove new line chars
525  inString[i] == '\t' || //remove tabs
526  inString[i] < 32 || //remove un-printable characters (they mess up xml interpretation)
527  (inString[i] > char(126) && inString[i] < char(161))) //this is aggravated by the bug in MFextensions (though Eric says he fixed on 8/24/2016)
528  //Note: greater than 255 should be impossible if by byte (but there are html chracters in 300s and 8000s)
529  {
530  if(allowWhiteSpace && //maintain new lines and tabs
531  inString[i] == '\n')
532  {
533  sprintf(htmlTmp,"&#%3.3d",inString[i]);
534  inString.insert(i,htmlTmp); //insert html str sequence
535  inString.replace(i+5,1,1,';'); // replace special character with ;
536  i+=6; //skip to next char to check
537  --i;
538  }
539  else if(allowWhiteSpace && //maintain new lines and tabs
540  inString[i] == '\t')
541  {
542  if(0)
543  {
544  //tab = 8 spaces
545  sprintf(htmlTmp,"&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160");
546  inString.insert(i,htmlTmp); //insert html str sequence
547  inString.replace(i+47,1,1,';'); // replace special character with ;
548  i+=48; //skip to next char to check
549  --i;
550  }
551  else //tab = 0x09
552  {
553 
554  sprintf(htmlTmp,"&#009");
555  inString.insert(i,htmlTmp); //insert html str sequence
556  inString.replace(i+5,1,1,';'); // replace special character with ;
557  i+=6; //skip to next char to check
558  --i;
559  }
560  }
561  else
562  {
563  inString.erase(i,1); //erase character
564  --i; //step back so next char to check is correct
565  }
566  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
567  continue;
568  }
569 
570  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
571 
572  //replace special characters
573  if(inString[i] == '\"' || inString[i] == '\'')
574  {
575  inString.insert(i,(inString[i] == '\'')?"&apos":"&quot"); //insert HTML name before quotes
576  inString.replace(i+5,1,1,';'); // replace special character with ;
577  i+=5; //skip to next char to check
578  //std::cout << __COUT_HDR_FL__<< inString << std::endl;
579  }
580  else if(inString[i] == '&')
581  {
582  inString.insert(i,"&amp"); //insert HTML name before special character
583  inString.replace(i+4,1,1,';'); // replace special character with ;
584  i+=4; //skip to next char to check
585  }
586  else if(inString[i] == '<' || inString[i] == '>')
587  {
588  inString.insert(i,(inString[i] == '<')?"&lt":"&gt"); //insert HTML name before special character
589  inString.replace(i+3,1,1,';'); // replace special character with ;
590  i+=3; //skip to next char to check
591  }
592  else if(inString[i] >= char(161) && inString[i] <= char(255)) //printable special characters
593  {
594  sprintf(htmlTmp,"&#%3.3d",inString[i]);
595  inString.insert(i,htmlTmp); //insert html number sequence
596  inString.replace(i+5,1,1,';'); // replace special character with ;
597  i+=5; //skip to next char to check
598  }
599 
600  if(doit) std::cout << __COUT_HDR_FL__<< inString << std::endl;
601 
602  ws = i; //last non white space char
603  }
604  else if(allowWhiteSpace) //keep white space if allowed
605  {
606  if(i-1 == ws) continue; //dont do anything for first white space
607 
608  //for second white space add 2, and 1 from then
609  if(i-2 == ws)
610  {
611  inString.insert(i,"&#160;"); //insert html space
612  i+=6; //skip to point at space again
613  }
614  inString.insert(i,"&#160"); //insert html space
615  inString.replace(i+5,1,1,';'); // replace special character with ;
616  i+=5; //skip to next char to check
617  ws = i;
618  }
619 
620  if(doit) std::cout << __COUT_HDR_FL__<< inString.size() << " " << ws << std::endl;
621 
622  inString.substr(0,ws+1);
623 
624  if(doit) std::cout << __COUT_HDR_FL__<< inString.size() << " " << inString << std::endl;
625 
626  if(ws == (unsigned int)-1) return ""; //empty std::string since all white space
627  return inString.substr(0,ws+1); //trim right white space
628 }
629 
630 
631 //==============================================================================
632 //XmlDocument::recursiveRemoveChild
633 // remove child and all of child's sub-tree from parent
634 void XmlDocument::recursiveRemoveChild(xercesc::DOMElement *childEl, xercesc::DOMElement *parentEl)
635 {
636  //release child's children first
637  xercesc::DOMNodeList* nodeList = childEl->getChildNodes(); //get all children within data
638  for(unsigned int i = 0; i<nodeList->getLength(); ++i)
639  recursiveRemoveChild((xercesc::DOMElement*)(nodeList->item(nodeList->getLength()-1-i)),childEl);
640 
641  //then release child
642  parentEl->removeChild(childEl);
643  childEl->release();
644 }
645 
646 //==============================================================================
647 //XmlDocument::saveXmlDocument
648 // wrapper for private outputXML
649 // Warning: filePath must be accessible or program will crash!
650 void XmlDocument::saveXmlDocument (std::string filePath)
651 {
652  std::cout << __COUT_HDR_FL__<< "Saving theDocument_ to file: " << filePath << std::endl;
653  //Return the first registered theImplementation_ that has the desired features. In this case, we are after a DOM theImplementation_ that has the LS feature... or Load/Save.
654  //DOMImplementation *theImplementation_ = DOMImplementationRegistry::getDOMImplementation(L"LS");
655  xercesc::DOMImplementation *saveImplementation = xercesc::DOMImplementationRegistry::getDOMImplementation(CONVERT_TO_XML("LS"));
656 
657  std::cout << __COUT_HDR_FL__<< "XERCES Version: " << _XERCES_VERSION << std::endl;
658 
659 #if _XERCES_VERSION >= 30000
660 
661  //std::cout << __COUT_HDR_FL__<< "making file" << filePath << std::endl;
662  // Create a DOMLSSerializer which is used to serialize a DOM tree into an XML theDocument_.
663  xercesc::DOMLSSerializer *serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSSerializer();
664 
665  // Make the output more human readable by inserting line feeds.
666  if (serializer->getDomConfig()->canSetParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true))
667  serializer->getDomConfig()->setParameter(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
668 
669  // The end-of-line sequence of characters to be used in the XML being written out.
670  serializer->setNewLine(CONVERT_TO_XML("\r\n"));
671 
672  // Convert the path into Xerces compatible XMLCh*.
673  //XMLCh *tempFilePath = const_cast<XMLCh*>(CONVERT_TO_XML(filePath));
674 
675  // Specify the target for the XML output.
676  xercesc::XMLFormatTarget* formatTarget;
677  try
678  {
679  //formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
680  formatTarget = new xercesc::LocalFileFormatTarget(filePath.c_str());
681  }
682  catch(...)
683  {
684  std::cout << __COUT_HDR_FL__<< "Inaccessible file path: " << filePath << std::endl;
685  serializer->release();
686  //xercesc::XMLString::release(&tempFilePath);
687 
688  return;
689  }
690 
691  // Create a new empty output destination object.
692  xercesc::DOMLSOutput *output = ((xercesc::DOMImplementationLS*)saveImplementation)->createLSOutput();
693 
694  // Set the stream to our target.
695  output->setByteStream(formatTarget);
696  // Write the serialized output to the destination.
697  serializer->write(theDocument_, output);
698  serializer->release();
699  //xercesc::XMLString::release(&tempFilePath);
700  delete formatTarget;
701 #else
702 
703  xercesc::DOMWriter *serializer = ((xercesc::DOMImplementationLS*)saveImplementation)->createDOMWriter();
704  serializer->setFeature(xercesc::XMLUni::fgDOMWRTFormatPrettyPrint, true);
705 
706  /*
707  Choose a location for the serialized output. The 3 options are:
708  1) StdOutFormatTarget (std output stream - good for debugging)
709  2) MemBufFormatTarget (to Memory)
710  3) LocalFileFormatTarget (save to file)
711  (Note: You'll need a different header file for each one)
712  */
713  //XMLFormatTarget* pTarget = new StdOutFormatTarget();
714  // Convert the path into Xerces compatible XMLCh*.
715  XMLCh *tempFilePath = xercesc::XMLString::transcode(filePath.c_str());
716  xercesc::XMLFormatTarget* formatTarget;
717  try
718  {
719  formatTarget = new xercesc::LocalFileFormatTarget(tempFilePath);
720  }
721  catch(...)
722  {
723  std::cout << __COUT_HDR_FL__<< "Inaccessible file path: " << filePath << std::endl;
724  serializer->release();
725  xercesc::XMLString::release(&tempFilePath);
726  return;
727  }
728 
729  // Write the serialized output to the target.
730 
731  serializer->writeNode(formatTarget, *theDocument_);
732  serializer->release();
733  xercesc::XMLString::release(&tempFilePath);
734  delete formatTarget;
735 #endif
736 
737  // Cleanup.
738  //std::cout << __COUT_HDR_FL__<< "delete format target" << std::endl;
739 
740 
741 #if _XERCES_VERSION >= 30000
742 
743  //std::cout << __COUT_HDR_FL__<< "delete output0" << std::endl;
744  output->release();
745  //std::cout << __COUT_HDR_FL__<< "delete output1" << std::endl;
746 
747 #endif
748 }
749 
750 
751 //==============================================================================
752 bool XmlDocument::loadXmlDocument (std::string filePath)
753 {
754  std::cout << __COUT_HDR_FL__<< "Loading theDocument_ from file: " << filePath << std::endl;
755 
756  struct stat fileStatus;
757 
758  if(stat(filePath.c_str(), &fileStatus) != 0)
759  {
760  std::cout << __COUT_HDR_FL__<< "File not accessible." << std::endl;
761  return false;
762  }
763 
764  //reset xml platform and theDocument_
765  terminatePlatform();
766  initPlatform();
767 
768  xercesc::XercesDOMParser* parser = new xercesc::XercesDOMParser;
769  // Configure xercesc::DOM parser.
770  parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
771  parser->setDoNamespaces ( true );
772  parser->setDoSchema ( true );
773  parser->useCachedGrammarInParse ( false );
774 
775  try
776  {
777  parser->parse( filePath.c_str() );
778 
779  //theDocument_ memory object owned by the parent parser object
780  theDocument_ = parser->adoptDocument();//instead of getDocument() so parser will not free theDocument_ when released
781 
782  // Get the top-level element: Name is "root". No attributes for "root"
783  rootElement_ = theDocument_->getDocumentElement();
784  if( !rootElement_ )
785  throw(std::runtime_error( "empty XML theDocument_" ));
786 
787  }
788  catch( xercesc::XMLException& e )
789  {
790  std::cout << __COUT_HDR_FL__<< "Error parsing file." << std::endl;
791  return false;
792  }
793  delete parser;
794 
795  return true;
796 }
797 
798 
799 //==============================================================================
800 //XmlDocument::recursiveOutputXmlDocument
801 // recursively printout XML theDocument_ to std out and output stream if not null
802 /*
803 void XmlDocument::recursiveFixTextFields(DOMElement *currEl)
804 {
805  DOMNodeList *nodeList = currEl->getChildNodes(); //get all children
806 
807  //recurse through children
808  for(unsigned int i = 0; i<nodeList->getLength();++i)
809  if(nodeList->item(i)->getNodeType() == DOMNode::TEXT_NODE) //fix text nodes
810  ((DOMElement*)(nodeList->item(i)))->setTextContent(CONVERT_TO_XML( //change text value to escaped version
811  escapeString(XML_TO_CHAR(((DOMElement*)(nodeList->item(i)))->getNodeValue()))));
812  else
813  recursiveFixTextFields ((DOMElement*)(nodeList->item(i)));
814 }
815 */
816