00001 #include "otsdaq-core/XmlUtilities/HttpXmlDocument.h"
00002 #include "otsdaq-core/XmlUtilities/ConvertToXML.h"
00003 #include "otsdaq-core/XmlUtilities/ConvertFromXML.h"
00004 #include "otsdaq-core/MessageFacility/MessageFacility.h"
00005 #include "otsdaq-core/Macros/CoutHeaderMacros.h"
00006
00007
00008 #include <stdexcept>
00009 #include <xercesc/dom/DOM.hpp>
00010 #include <xercesc/dom/DOMDocument.hpp>
00011 #include <xercesc/dom/DOMDocumentType.hpp>
00012 #include <xercesc/dom/DOMElement.hpp>
00013 #include <xercesc/dom/DOMImplementation.hpp>
00014 #include <xercesc/dom/DOMImplementationRegistry.hpp>
00015 #include <xercesc/dom/DOMImplementationLS.hpp>
00016
00017
00018 #include <xercesc/dom/DOMNodeIterator.hpp>
00019 #include <xercesc/dom/DOMNodeList.hpp>
00020 #include <xercesc/dom/DOMText.hpp>
00021 #include <xercesc/validators/common/Grammar.hpp>
00022
00023 #include <xercesc/parsers/XercesDOMParser.hpp>
00024 #include <xercesc/util/XMLUni.hpp>
00025 #include <xercesc/util/XercesDefs.hpp>
00026
00027 #include <xercesc/util/OutOfMemoryException.hpp>
00028 #include <xercesc/framework/LocalFileFormatTarget.hpp>
00029
00030 #include <iostream>
00031 #include <sstream>
00032 #include <list>
00033
00034 #include <sys/types.h>
00035 #include <sys/stat.h>
00036 #include <unistd.h>
00037 #include <errno.h>
00038
00039 using namespace ots;
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053 HttpXmlDocument::HttpXmlDocument(std::string cookieCode, std::string displayName) :
00054 XmlDocument ("ROOT"),
00055 headerElement_ (0),
00056 dataElement_ (0),
00057 headerTagName_ ("HEADER"),
00058 dataTagName_ ("DATA"),
00059 cookieCodeTagName_ ("CookieCode"),
00060 displayNameTagName_("DisplayName")
00061 {
00062
00063
00064
00065 if(cookieCode != "" || displayName != "")
00066 {
00067 headerElement_ = theDocument_->createElement(CONVERT_TO_XML(headerTagName_));
00068 rootElement_->appendChild(headerElement_);
00069 if(cookieCode != "")
00070 addTextElementToParent(cookieCodeTagName_,cookieCode,headerElement_);
00071 if(displayName != "")
00072 addTextElementToParent(displayNameTagName_,displayName,headerElement_);
00073 }
00074
00075
00076 dataElement_ = theDocument_->createElement(CONVERT_TO_XML(dataTagName_));
00077 rootElement_->appendChild(dataElement_);
00078
00079 }
00080
00081
00082 HttpXmlDocument::HttpXmlDocument(const HttpXmlDocument& doc) :
00083 XmlDocument(doc),
00084 headerElement_ (0),
00085 dataElement_ (0),
00086 headerTagName_ (doc.headerTagName_),
00087 dataTagName_ (doc.dataTagName_),
00088 cookieCodeTagName_ (doc.cookieCodeTagName_),
00089 displayNameTagName_(doc.displayNameTagName_)
00090 {
00091
00092 *this = doc;
00093
00094 }
00095
00096
00097 HttpXmlDocument& HttpXmlDocument::operator=(const HttpXmlDocument& doc)
00098 {
00099
00100 recursiveElementCopy(doc.rootElement_, rootElement_);
00101 if(doc.headerElement_ != 0)
00102 headerElement_ = (xercesc::DOMElement*)rootElement_->getElementsByTagName(CONVERT_TO_XML(headerTagName_))->item(0);
00103 dataElement_ = (xercesc::DOMElement*)rootElement_->getElementsByTagName(CONVERT_TO_XML(dataTagName_))->item(0);
00104
00105 return *this;
00106 }
00107
00108
00109 HttpXmlDocument::~HttpXmlDocument(void)
00110 {}
00111
00112 void HttpXmlDocument::setHeader(std::string cookieCode, std::string displayName)
00113 {
00114 if(headerElement_)
00115 {
00116 std::stringstream ss;
00117 ss << __COUT_HDR_FL__ <<
00118 "Can NOT set header to doc with a header! Only allowed for docs without header element.";
00119 throw std::runtime_error(ss.str());
00120 }
00121
00122
00123 if(cookieCode != "" || displayName != "")
00124 {
00125 headerElement_ = theDocument_->createElement(CONVERT_TO_XML(headerTagName_));
00126 rootElement_->appendChild(headerElement_);
00127 if(cookieCode != "")
00128 addTextElementToParent(cookieCodeTagName_,cookieCode,headerElement_);
00129 if(displayName != "")
00130 addTextElementToParent(displayNameTagName_,displayName,headerElement_);
00131 }
00132 }
00133
00134
00135 xercesc::DOMElement* HttpXmlDocument::addTextElementToData(const std::string &childName, const std::string &childValue)
00136 {
00137
00138 return addTextElementToParent(childName,childValue,dataElement_);
00139 }
00140
00141
00142
00143
00144 unsigned int HttpXmlDocument::getChildrenCount(xercesc::DOMElement* parent)
00145 {
00146 if(!parent) parent = dataElement_;
00147
00148 xercesc::DOMNodeList* nodeList = parent->getChildNodes();
00149 unsigned int count = 0;
00150
00151 for(unsigned int i = 0; i<nodeList->getLength();++i) {
00152 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00153 ++count;
00154 }
00155
00156 return count;
00157 }
00158
00159
00160
00161
00162
00163 void HttpXmlDocument::removeDataElement(unsigned int dataChildIndex)
00164 {
00165 xercesc::DOMNodeList* nodeList = dataElement_->getChildNodes();
00166
00167 for(unsigned int i = 0; i<nodeList->getLength();++i)
00168 {
00169 if(nodeList->item(i)->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00170 continue;
00171
00172 if(!dataChildIndex)
00173 {
00174 recursiveRemoveChild((xercesc::DOMElement*)(nodeList->item(i)),dataElement_);
00175 return;
00176 }
00177
00178 --dataChildIndex;
00179 }
00180
00181
00182 }
00183
00184
00185
00186
00187 void HttpXmlDocument::copyDataChildren(HttpXmlDocument &document)
00188 {
00189
00190 xercesc::DOMNodeList* nodeList = document.dataElement_->getChildNodes();
00191 for(unsigned int i = 0; i<nodeList->getLength();++i)
00192 {
00193 if(nodeList->item(i)->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00194 continue;
00195
00196 recursiveAddElementToParent((xercesc::DOMElement*)(nodeList->item(i)),dataElement_,true);
00197 }
00198 }
00199
00200
00201
00202
00203 void HttpXmlDocument::outputXmlDocument (std::ostringstream *out, bool dispStdOut, bool allowWhiteSpace)
00204 {
00205 recursiveOutputXmlDocument(theDocument_->getDocumentElement(),out,dispStdOut, "", allowWhiteSpace);
00206 }
00207
00208
00209
00210
00211 void HttpXmlDocument::recursiveOutputXmlDocument (xercesc::DOMElement *currEl, std::ostringstream *out,
00212 bool dispStdOut, std::string tabStr, bool allowWhiteSpace)
00213 {
00214
00215 if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "<" << XML_TO_CHAR(currEl->getNodeName()) ;
00216 if(out) *out << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
00217
00218
00219 if( currEl->getFirstChild() != NULL &&
00220 currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00221 {
00222 if(dispStdOut) std::cout << " value='" <<
00223 escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()),allowWhiteSpace) << "'";
00224 if(out) *out << " value='" <<
00225 escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()),allowWhiteSpace) << "'";
00226 }
00227
00228 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00229
00230
00231 if(dispStdOut) std::cout << ((nodeList->getLength() == 0 ||
00232 (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
00233 << ">" << " len:" << nodeList->getLength() << std::endl;
00234 if(out) *out << ((nodeList->getLength() == 0 ||
00235 (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
00236 << ">" << std::endl;
00237
00238
00239 std::string newTabStr = tabStr + "\t";
00240 for(unsigned int i = 0; i<nodeList->getLength();++i)
00241 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00242 recursiveOutputXmlDocument ((xercesc::DOMElement*)(nodeList->item(i)),out,dispStdOut,newTabStr,allowWhiteSpace);
00243
00244
00245 if(nodeList->getLength() > 1 || (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() != xercesc::DOMNode::TEXT_NODE))
00246 {
00247 if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
00248 if(out) *out << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
00249 }
00250 }
00251
00252
00253
00254
00255
00256 std::string HttpXmlDocument::getMatchingValue (const std::string &field, const unsigned int occurance)
00257 {
00258 unsigned int count = 0;
00259 return recursiveFindElementValue(theDocument_->getDocumentElement(),field,occurance,count);
00260 }
00261
00262
00263
00264
00265 std::string HttpXmlDocument::recursiveFindElementValue (xercesc::DOMElement *currEl, const std::string &field, const unsigned int occurance, unsigned int &count)
00266 {
00267 if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++)
00268 {
00269 if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00270 return escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
00271 else
00272 return "";
00273 }
00274
00275 std::string retStr;
00276
00277 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00278 for(unsigned int i = 0; i<nodeList->getLength();++i)
00279 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00280 {
00281 retStr = recursiveFindElementValue ((xercesc::DOMElement*)(nodeList->item(i)),field,occurance,count);
00282 if(retStr != "") return retStr;
00283
00284 }
00285 return "";
00286 }
00287
00288
00289
00290
00291
00292 void HttpXmlDocument::getAllMatchingValues (const std::string &field, std::vector<std::string> &retVec)
00293 {
00294 recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
00295 }
00296
00297
00298
00299
00300 void HttpXmlDocument::recursiveFindAllElements (xercesc::DOMElement *currEl, const std::string &field,std::vector<std::string> *retVec)
00301 {
00302 if (XML_TO_CHAR(currEl->getNodeName()) == field &&
00303 currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00304 retVec->push_back(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
00305
00306
00307
00308 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00309 for(unsigned int i = 0; i<nodeList->getLength();++i)
00310 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00311 recursiveFindAllElements ((xercesc::DOMElement*)(nodeList->item(i)),field,retVec);
00312 }
00313
00314
00315
00316
00317
00318 xercesc::DOMElement* HttpXmlDocument::getMatchingElement (const std::string &field, const unsigned int occurance)
00319 {
00320 return getMatchingElementInSubtree(theDocument_->getDocumentElement(),field,occurance);
00321 }
00322
00323
00324
00325
00326
00327
00328 xercesc::DOMElement* HttpXmlDocument::getMatchingElementInSubtree (xercesc::DOMElement *parentEl, const std::string &field,
00329 const unsigned int occurance)
00330 {
00331 unsigned int count = 0;
00332 return recursiveFindElement(parentEl,field,occurance,count);
00333 }
00334
00335
00336
00337
00338 xercesc::DOMElement* HttpXmlDocument::recursiveFindElement (xercesc::DOMElement *currEl, const std::string &field,
00339 const unsigned int occurance, unsigned int &count)
00340 {
00341 if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++)
00342 {
00343 if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00344 return currEl;
00345 else
00346 return 0;
00347 }
00348
00349 xercesc::DOMElement* retEl;
00350
00351 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00352 for(unsigned int i = 0; i<nodeList->getLength();++i)
00353 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00354 {
00355 retEl = recursiveFindElement ((xercesc::DOMElement*)(nodeList->item(i)),field,occurance,count);
00356 if(retEl) return retEl;
00357
00358 }
00359 return 0;
00360 }
00361
00362
00363
00364
00365
00366 void HttpXmlDocument::recursiveAddElementToParent(xercesc::DOMElement* child, xercesc::DOMElement* parent, bool html)
00367 {
00368 std::string childText = "";
00369
00370 std::string childName = XML_TO_CHAR(child->getNodeName());
00371
00372 if( child->getFirstChild() != NULL && child->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00373 {
00374 childText = XML_TO_CHAR(child->getFirstChild()->getNodeValue());
00375 if(html)
00376 childText = escapeString(childText);
00377 }
00378
00379
00380
00381 xercesc::DOMElement* newParent = addTextElementToParent(childName, childText, parent);
00382
00383
00384 xercesc::DOMNodeList *nodeList = child->getChildNodes();
00385 for(unsigned int i = 0; i<nodeList->getLength();++i)
00386 {
00387 if(nodeList->item(i)->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00388 continue;
00389
00390 recursiveAddElementToParent((xercesc::DOMElement*)(nodeList->item(i)), newParent, html);
00391 }
00392 }
00393
00394
00395
00396
00397
00398 void HttpXmlDocument::getAllMatchingElements (const std::string &field, std::vector<xercesc::DOMElement*> &retVec)
00399 {
00400 recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
00401 }
00402
00403
00404
00405
00406
00407 void HttpXmlDocument::recursiveFindAllElements (xercesc::DOMElement *currEl, const std::string &field,std::vector<xercesc::DOMElement*> *retVec)
00408 {
00409 if (XML_TO_CHAR(currEl->getNodeName()) == field &&
00410 currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00411 retVec->push_back(currEl);
00412
00413
00414
00415 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00416 for(unsigned int i = 0; i<nodeList->getLength();++i)
00417 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00418 recursiveFindAllElements ((xercesc::DOMElement*)(nodeList->item(i)),field,retVec);
00419 }
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455 bool HttpXmlDocument::loadXmlDocument (const std::string &filePath)
00456 {
00457
00458
00459 struct stat fileStatus;
00460
00461 if(stat(filePath.c_str(), &fileStatus) != 0)
00462 {
00463 std::cout << __COUT_HDR_FL__<< "File not accessible." << std::endl;
00464 return false;
00465 }
00466
00467
00468 terminatePlatform();
00469 initPlatform();
00470
00471 xercesc::XercesDOMParser* parser = new xercesc::XercesDOMParser;
00472
00473 parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
00474 parser->setDoNamespaces ( true );
00475 parser->setDoSchema ( true );
00476 parser->useCachedGrammarInParse ( false );
00477
00478 try
00479 {
00480 parser->parse( filePath.c_str() );
00481
00482
00483 theDocument_ = parser->adoptDocument();
00484
00485
00486 rootElement_ = theDocument_->getDocumentElement();
00487 if( !rootElement_ )
00488 throw(std::runtime_error( "empty XML theDocument_" ));
00489
00490 recursiveFixTextFields(rootElement_);
00491
00492 xercesc::DOMNodeList *nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(headerTagName_));
00493 if(nodeList->getLength())
00494 headerElement_ = (xercesc::DOMElement*)(theDocument_->getElementsByTagName(CONVERT_TO_XML(headerTagName_))->item(0));
00495 else
00496 headerElement_ = 0;
00497
00498 dataElement_ = (xercesc::DOMElement*)(theDocument_->getElementsByTagName(CONVERT_TO_XML(dataTagName_))->item(0));
00499 }
00500 catch( xercesc::XMLException& e )
00501 {
00502 std::cout << __COUT_HDR_FL__<< "Error parsing file." << std::endl;
00503 return false;
00504 }
00505 delete parser;
00506
00507 return true;
00508 }
00509
00510
00511
00512
00513 void HttpXmlDocument::recursiveFixTextFields(xercesc::DOMElement *currEl)
00514 {
00515 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00516
00517
00518 for(unsigned int i = 0; i<nodeList->getLength();++i)
00519 if(nodeList->item(i)->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00520 ((xercesc::DOMElement*)(nodeList->item(i)))->setTextContent(CONVERT_TO_XML(
00521 escapeString(XML_TO_CHAR(((xercesc::DOMElement*)(nodeList->item(i)))->getNodeValue()))));
00522 else
00523 recursiveFixTextFields ((xercesc::DOMElement*)(nodeList->item(i)));
00524 }
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581