00001 #include "otsdaq-core/XmlUtilities/HttpXmlDocument.h"
00002 #include "otsdaq-core/XmlUtilities/ConvertToXML.h"
00003 #include "otsdaq-core/XmlUtilities/ConvertFromXML.h"
00004 #include "otsdaq-core/MessageFacility/MessageFacility.h"
00005 #include "otsdaq-core/Macros/CoutMacros.h"
00006
00007
00008 #include <stdexcept>
00009 #include <xercesc/dom/DOM.hpp>
00010 #include <xercesc/dom/DOMDocument.hpp>
00011 #include <xercesc/dom/DOMDocumentType.hpp>
00012 #include <xercesc/dom/DOMElement.hpp>
00013 #include <xercesc/dom/DOMImplementation.hpp>
00014 #include <xercesc/dom/DOMImplementationRegistry.hpp>
00015 #include <xercesc/dom/DOMImplementationLS.hpp>
00016
00017
00018 #include <xercesc/dom/DOMNodeIterator.hpp>
00019 #include <xercesc/dom/DOMNodeList.hpp>
00020 #include <xercesc/dom/DOMText.hpp>
00021 #include <xercesc/validators/common/Grammar.hpp>
00022
00023 #include <xercesc/parsers/XercesDOMParser.hpp>
00024 #include <xercesc/util/XMLUni.hpp>
00025 #include <xercesc/util/XercesDefs.hpp>
00026
00027 #include <xercesc/util/OutOfMemoryException.hpp>
00028 #include <xercesc/framework/LocalFileFormatTarget.hpp>
00029
00030 #include <iostream>
00031 #include <sstream>
00032 #include <list>
00033
00034 #include <sys/types.h>
00035 #include <sys/stat.h>
00036 #include <unistd.h>
00037 #include <errno.h>
00038
00039 using namespace ots;
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053 HttpXmlDocument::HttpXmlDocument(std::string cookieCode, std::string displayName) :
00054 XmlDocument ("ROOT"),
00055 headerElement_ (0),
00056 dataElement_ (0),
00057 headerTagName_ ("HEADER"),
00058 dataTagName_ ("DATA"),
00059 cookieCodeTagName_ ("CookieCode"),
00060 displayNameTagName_("DisplayName")
00061 {
00062
00063
00064
00065 if(cookieCode != "" || displayName != "")
00066 {
00067 headerElement_ = theDocument_->createElement(CONVERT_TO_XML(headerTagName_));
00068 rootElement_->appendChild(headerElement_);
00069 if(cookieCode != "")
00070 addTextElementToParent(cookieCodeTagName_,cookieCode,headerElement_);
00071 if(displayName != "")
00072 addTextElementToParent(displayNameTagName_,displayName,headerElement_);
00073 }
00074
00075
00076 dataElement_ = theDocument_->createElement(CONVERT_TO_XML(dataTagName_));
00077 rootElement_->appendChild(dataElement_);
00078
00079 }
00080
00081
00082 HttpXmlDocument::HttpXmlDocument(const HttpXmlDocument& doc) :
00083 XmlDocument(doc),
00084 headerElement_ (0),
00085 dataElement_ (0),
00086 headerTagName_ (doc.headerTagName_),
00087 dataTagName_ (doc.dataTagName_),
00088 cookieCodeTagName_ (doc.cookieCodeTagName_),
00089 displayNameTagName_(doc.displayNameTagName_)
00090 {
00091
00092 *this = doc;
00093
00094 }
00095
00096
00097 HttpXmlDocument& HttpXmlDocument::operator=(const HttpXmlDocument& doc)
00098 {
00099
00100 recursiveElementCopy(doc.rootElement_, rootElement_);
00101 if(doc.headerElement_ != 0)
00102 headerElement_ = (xercesc::DOMElement*)rootElement_->getElementsByTagName(CONVERT_TO_XML(headerTagName_))->item(0);
00103 dataElement_ = (xercesc::DOMElement*)rootElement_->getElementsByTagName(CONVERT_TO_XML(dataTagName_))->item(0);
00104
00105 return *this;
00106 }
00107
00108
00109 HttpXmlDocument::~HttpXmlDocument(void)
00110 {}
00111
00112 void HttpXmlDocument::setHeader(std::string cookieCode, std::string displayName)
00113 {
00114 if(headerElement_)
00115 {
00116 std::stringstream ss;
00117 ss << __COUT_HDR_FL__ <<
00118 "Can NOT set header to doc with a header! Only allowed for docs without header element.";
00119 throw std::runtime_error(ss.str());
00120 }
00121
00122
00123 if(cookieCode != "" || displayName != "")
00124 {
00125 headerElement_ = theDocument_->createElement(CONVERT_TO_XML(headerTagName_));
00126 rootElement_->appendChild(headerElement_);
00127 if(cookieCode != "")
00128 addTextElementToParent(cookieCodeTagName_,cookieCode,headerElement_);
00129 if(displayName != "")
00130 addTextElementToParent(displayNameTagName_,displayName,headerElement_);
00131 }
00132 }
00133
00134
00135 xercesc::DOMElement* HttpXmlDocument::addTextElementToData(const std::string &childName,
00136 const std::string &childValue)
00137 {
00138
00139 return addTextElementToParent(childName,childValue,dataElement_);
00140 }
00141
00142
00143 xercesc::DOMElement* HttpXmlDocument::addBinaryStringToData(const std::string &childName,
00144 const std::string &binary)
00145 {
00146 std::string convertStr = "";
00147 char hexStr[3];
00148 for(unsigned int i=0;i<binary.length();++i)
00149 {
00150
00151 sprintf(hexStr,"%2.2X",((unsigned char)binary[i]));
00152 hexStr[2] = '\0';
00153 convertStr += hexStr;
00154 }
00155
00156 return addTextElementToParent(childName,convertStr,dataElement_);
00157 }
00158
00159
00160
00161
00162 unsigned int HttpXmlDocument::getChildrenCount(xercesc::DOMElement* parent)
00163 {
00164 if(!parent) parent = dataElement_;
00165
00166 xercesc::DOMNodeList* nodeList = parent->getChildNodes();
00167 unsigned int count = 0;
00168
00169 for(unsigned int i = 0; i<nodeList->getLength();++i) {
00170 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00171 ++count;
00172 }
00173
00174 return count;
00175 }
00176
00177
00178
00179
00180
00181 void HttpXmlDocument::removeDataElement(unsigned int dataChildIndex)
00182 {
00183 xercesc::DOMNodeList* nodeList = dataElement_->getChildNodes();
00184
00185 for(unsigned int i = 0; i<nodeList->getLength();++i)
00186 {
00187 if(nodeList->item(i)->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00188 continue;
00189
00190 if(!dataChildIndex)
00191 {
00192 recursiveRemoveChild((xercesc::DOMElement*)(nodeList->item(i)),dataElement_);
00193 return;
00194 }
00195
00196 --dataChildIndex;
00197 }
00198
00199
00200 }
00201
00202
00203
00204
00205 void HttpXmlDocument::copyDataChildren(HttpXmlDocument &document)
00206 {
00207
00208 xercesc::DOMNodeList* nodeList = document.dataElement_->getChildNodes();
00209 for(unsigned int i = 0; i<nodeList->getLength();++i)
00210 {
00211 if(nodeList->item(i)->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00212 continue;
00213
00214 recursiveAddElementToParent((xercesc::DOMElement*)(nodeList->item(i)),dataElement_,true);
00215 }
00216 }
00217
00218
00219
00220
00221 void HttpXmlDocument::outputXmlDocument (std::ostringstream *out, bool dispStdOut,
00222 bool allowWhiteSpace)
00223 {
00224 recursiveOutputXmlDocument(theDocument_->getDocumentElement(),out,dispStdOut, "", allowWhiteSpace);
00225 }
00226
00227
00228
00229
00230 void HttpXmlDocument::recursiveOutputXmlDocument (xercesc::DOMElement *currEl, std::ostringstream *out,
00231 bool dispStdOut, std::string tabStr, bool allowWhiteSpace)
00232 {
00233
00234 if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "<" << XML_TO_CHAR(currEl->getNodeName()) ;
00235 if(out) *out << tabStr << "<" << XML_TO_CHAR(currEl->getNodeName());
00236
00237
00238 if( currEl->getFirstChild() != NULL &&
00239 currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00240 {
00241 if(dispStdOut) std::cout << " value='" <<
00242 escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()),allowWhiteSpace) << "'";
00243 if(out) *out << " value='" <<
00244 escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()),allowWhiteSpace) << "'";
00245 }
00246
00247 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00248
00249
00250 if(dispStdOut) std::cout << ((nodeList->getLength() == 0 ||
00251 (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
00252 << ">" << " len:" << nodeList->getLength() << std::endl;
00253 if(out) *out << ((nodeList->getLength() == 0 ||
00254 (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE))? "/":"")
00255 << ">" << std::endl;
00256
00257
00258 std::string newTabStr = tabStr + "\t";
00259 for(unsigned int i = 0; i<nodeList->getLength();++i)
00260 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00261 recursiveOutputXmlDocument ((xercesc::DOMElement*)(nodeList->item(i)),out,dispStdOut,newTabStr,allowWhiteSpace);
00262
00263
00264 if(nodeList->getLength() > 1 || (nodeList->getLength() == 1 && currEl->getFirstChild()->getNodeType() != xercesc::DOMNode::TEXT_NODE))
00265 {
00266 if(dispStdOut) std::cout << __COUT_HDR_FL__<< tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
00267 if(out) *out << tabStr << "</" << XML_TO_CHAR(currEl->getNodeName()) << ">" << std::endl;
00268 }
00269 }
00270
00271
00272
00273
00274
00275 std::string HttpXmlDocument::getMatchingValue (const std::string &field, const unsigned int occurance)
00276 {
00277 unsigned int count = 0;
00278 return recursiveFindElementValue(theDocument_->getDocumentElement(),field,occurance,count);
00279 }
00280
00281
00282
00283
00284 std::string HttpXmlDocument::recursiveFindElementValue (xercesc::DOMElement *currEl, const std::string &field,
00285 const unsigned int occurance, unsigned int &count)
00286 {
00287 if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++)
00288 {
00289 if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00290 return escapeString(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
00291 else
00292 return "";
00293 }
00294
00295 std::string retStr;
00296
00297 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00298 for(unsigned int i = 0; i<nodeList->getLength();++i)
00299 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00300 {
00301 retStr = recursiveFindElementValue ((xercesc::DOMElement*)(nodeList->item(i)),field,occurance,count);
00302 if(retStr != "") return retStr;
00303
00304 }
00305 return "";
00306 }
00307
00308
00309
00310
00311
00312 void HttpXmlDocument::getAllMatchingValues (const std::string &field, std::vector<std::string> &retVec)
00313 {
00314 recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
00315 }
00316
00317
00318
00319
00320 void HttpXmlDocument::recursiveFindAllElements (xercesc::DOMElement *currEl, const std::string &field,std::vector<std::string> *retVec)
00321 {
00322 if (XML_TO_CHAR(currEl->getNodeName()) == field &&
00323 currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00324 retVec->push_back(XML_TO_CHAR(currEl->getFirstChild()->getNodeValue()));
00325
00326
00327
00328 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00329 for(unsigned int i = 0; i<nodeList->getLength();++i)
00330 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00331 recursiveFindAllElements ((xercesc::DOMElement*)(nodeList->item(i)),field,retVec);
00332 }
00333
00334
00335
00336
00337
00338 xercesc::DOMElement* HttpXmlDocument::getMatchingElement (const std::string &field, const unsigned int occurance)
00339 {
00340 return getMatchingElementInSubtree(theDocument_->getDocumentElement(),field,occurance);
00341 }
00342
00343
00344
00345
00346
00347
00348 xercesc::DOMElement* HttpXmlDocument::getMatchingElementInSubtree (xercesc::DOMElement *parentEl, const std::string &field,
00349 const unsigned int occurance)
00350 {
00351 unsigned int count = 0;
00352 return recursiveFindElement(parentEl,field,occurance,count);
00353 }
00354
00355
00356
00357
00358 xercesc::DOMElement* HttpXmlDocument::recursiveFindElement (xercesc::DOMElement *currEl, const std::string &field,
00359 const unsigned int occurance, unsigned int &count)
00360 {
00361 if (XML_TO_CHAR(currEl->getNodeName()) == field && occurance == count++)
00362 {
00363 if( currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00364 return currEl;
00365 else
00366 return 0;
00367 }
00368
00369 xercesc::DOMElement* retEl;
00370
00371 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00372 for(unsigned int i = 0; i<nodeList->getLength();++i)
00373 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00374 {
00375 retEl = recursiveFindElement ((xercesc::DOMElement*)(nodeList->item(i)),field,occurance,count);
00376 if(retEl) return retEl;
00377
00378 }
00379 return 0;
00380 }
00381
00382
00383
00384
00385
00386 void HttpXmlDocument::recursiveAddElementToParent(xercesc::DOMElement* child, xercesc::DOMElement* parent, bool html)
00387 {
00388 std::string childText = "";
00389
00390 std::string childName = XML_TO_CHAR(child->getNodeName());
00391
00392 if( child->getFirstChild() != NULL && child->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00393 {
00394 childText = XML_TO_CHAR(child->getFirstChild()->getNodeValue());
00395 if(html)
00396 childText = escapeString(childText);
00397 }
00398
00399
00400
00401 xercesc::DOMElement* newParent = addTextElementToParent(childName, childText, parent);
00402
00403
00404 xercesc::DOMNodeList *nodeList = child->getChildNodes();
00405 for(unsigned int i = 0; i<nodeList->getLength();++i)
00406 {
00407 if(nodeList->item(i)->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00408 continue;
00409
00410 recursiveAddElementToParent((xercesc::DOMElement*)(nodeList->item(i)), newParent, html);
00411 }
00412 }
00413
00414
00415
00416
00417
00418 void HttpXmlDocument::getAllMatchingElements (const std::string &field, std::vector<xercesc::DOMElement*> &retVec)
00419 {
00420 recursiveFindAllElements(theDocument_->getDocumentElement(),field,&retVec);
00421 }
00422
00423
00424
00425
00426
00427 void HttpXmlDocument::recursiveFindAllElements (xercesc::DOMElement *currEl, const std::string &field,std::vector<xercesc::DOMElement*> *retVec)
00428 {
00429 if (XML_TO_CHAR(currEl->getNodeName()) == field &&
00430 currEl->getFirstChild() != NULL && currEl->getFirstChild()->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00431 retVec->push_back(currEl);
00432
00433
00434
00435 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00436 for(unsigned int i = 0; i<nodeList->getLength();++i)
00437 if(nodeList->item(i)->getNodeType() != xercesc::DOMNode::TEXT_NODE)
00438 recursiveFindAllElements ((xercesc::DOMElement*)(nodeList->item(i)),field,retVec);
00439 }
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475 bool HttpXmlDocument::loadXmlDocument (const std::string &filePath)
00476 {
00477
00478
00479 struct stat fileStatus;
00480
00481 if(stat(filePath.c_str(), &fileStatus) != 0)
00482 {
00483 std::cout << __COUT_HDR_FL__<< "File not accessible." << std::endl;
00484 return false;
00485 }
00486
00487
00488 terminatePlatform();
00489 initPlatform();
00490
00491 xercesc::XercesDOMParser* parser = new xercesc::XercesDOMParser;
00492
00493 parser->setValidationScheme(xercesc::XercesDOMParser::Val_Auto);
00494 parser->setDoNamespaces ( true );
00495 parser->setDoSchema ( true );
00496 parser->useCachedGrammarInParse ( false );
00497
00498 try
00499 {
00500 parser->parse( filePath.c_str() );
00501
00502
00503 theDocument_ = parser->adoptDocument();
00504
00505
00506 rootElement_ = theDocument_->getDocumentElement();
00507 if( !rootElement_ )
00508 throw(std::runtime_error( "empty XML theDocument_" ));
00509
00510 recursiveFixTextFields(rootElement_);
00511
00512 xercesc::DOMNodeList *nodeList = theDocument_->getElementsByTagName(CONVERT_TO_XML(headerTagName_));
00513 if(nodeList->getLength())
00514 headerElement_ = (xercesc::DOMElement*)(theDocument_->getElementsByTagName(CONVERT_TO_XML(headerTagName_))->item(0));
00515 else
00516 headerElement_ = 0;
00517
00518 dataElement_ = (xercesc::DOMElement*)(theDocument_->getElementsByTagName(CONVERT_TO_XML(dataTagName_))->item(0));
00519 }
00520 catch( xercesc::XMLException& e )
00521 {
00522 std::cout << __COUT_HDR_FL__<< "Error parsing file." << std::endl;
00523 return false;
00524 }
00525 delete parser;
00526
00527 return true;
00528 }
00529
00530
00531
00532
00533 void HttpXmlDocument::recursiveFixTextFields(xercesc::DOMElement *currEl)
00534 {
00535 xercesc::DOMNodeList *nodeList = currEl->getChildNodes();
00536
00537
00538 for(unsigned int i = 0; i<nodeList->getLength();++i)
00539 if(nodeList->item(i)->getNodeType() == xercesc::DOMNode::TEXT_NODE)
00540 ((xercesc::DOMElement*)(nodeList->item(i)))->setTextContent(CONVERT_TO_XML(
00541 escapeString(XML_TO_CHAR(((xercesc::DOMElement*)(nodeList->item(i)))->getNodeValue()))));
00542 else
00543 recursiveFixTextFields ((xercesc::DOMElement*)(nodeList->item(i)));
00544 }
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601