ATLAS Offline Software
Loading...
Searching...
No Matches
ReadFromXmlDom.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4//
5// ReadFromXmlDom.cxx
6// HDef
7//
8// Created by sroe on 15/03/2016.
9//
10
11#include "ReadFromXmlDom.h"
12#include "xmlUtilities.h"
13#include "XincludeErrHandler.h"
14#include <xercesc/parsers/AbstractDOMParser.hpp>
15#include <xercesc/dom/DOM.hpp>
16#include <sys/stat.h>
18#include <array>
19#include <algorithm>
20#include <cmath>
21#include <regex>
22
24ATLAS_NO_CHECK_FILE_THREAD_SAFETY; // Not sure if usage of Xerces-C++ here is thread safe.
25// The following warning message is given if checked:
26// warning: Use of static expression 'xercesc_3_1::XMLPlatformUtils::fgMemoryManager'
27// of type 'xercesc_3_1::MemoryManager*' within function
28// 'toNative(const XMLCh*)::<lambda(char*)>' may not be thread-safe.
29//
30// https://xerces.apache.org/xerces-c/faq-parse-3.html#faq-6
31// Is Xerces-C++ thread-safe?
32// The answer is yes if you observe the following rules for using Xerces-C++
33// in a multi-threaded environment:
34// ... ...
35
36namespace {
37 bool
38 validAxisName(const std::string& proposedName, const std::array<std::string, 3>& allowedNames) {
39 return((proposedName == allowedNames[0])or(proposedName == allowedNames[1])or(proposedName == allowedNames[2]));
40 }
41}
42
43ReadFromXmlDom::ReadFromXmlDom() : m_source("unspecified file"), m_format("text/xml") {
44}
45
47 m_source (PathResolver::find_file(source, "DATAPATH")),
48 m_format("text/xml")
49{
50}
51
52std::string
54 return m_source;
55}
56
57std::string
59 return m_format;
60}
61
62bool
63ReadFromXmlDom::histoDefinitionMap(std::map<std::string, SingleHistogramDefinition>& usersmap) const {
64 bool ok(true);
65
66 for (const auto& i:m_vectorOfDefinitions) {
67 if (i.empty()) {
68 continue;
69 }
70 if (not i.validType()) {
71 continue;
72 }
73 bool thisIsOk = (usersmap.insert(std::pair<std::string, SingleHistogramDefinition>(i.stringIndex(), i))).second;
74 if (not thisIsOk) {
75 ok &= thisIsOk;
76 std::string msg = "You have attempted to add a duplicate histogram definition: " + i.stringIndex();
77 throw std::runtime_error(msg);
78 }
79 }
80 return(ok and(not usersmap.empty()));
81}
82
83bool
85 bool ok(true);
86
87 m_vectorOfDefinitions.push_back(oneDefinition);
88 return ok;
89}
90
91bool
93 bool ok(true);
94
95 if (m_source.empty() or(not sourceExists())) {
96 std::cerr << "Could not open file " << m_source << " in ReadFromXmlDom initialize" << std::endl;
97 return false;
98 }
99 myXerces::Lib xercesFrame; // RAII xerces context
100 static const XMLCh gLS[] = {
101 xercesc::chLatin_L, xercesc::chLatin_S, xercesc::chNull
102 };
103 xercesc::DOMImplementation* impl = xercesc::DOMImplementationRegistry::getDOMImplementation(gLS);
104 xercesc::DOMLSParser* parser = static_cast<xercesc::DOMImplementationLS*> (impl)->createLSParser(
105 xercesc::DOMImplementationLS::MODE_SYNCHRONOUS, nullptr);
106 xercesc::DOMConfiguration* config = parser->getDomConfig();
107 if (config->canSetParameter(xercesc::XMLUni::fgXercesDoXInclude, true)) {
108 config->setParameter(xercesc::XMLUni::fgXercesDoXInclude, true);
109 }
111 config->setParameter(xercesc::XMLUni::fgDOMErrorHandler, &errorHandler);
112 auto *doc = parser->parseURI(m_source.c_str());
113 const XercesString temp = fromNative("h");
114 xercesc::DOMNodeList* list = doc->getElementsByTagName(temp.c_str());
115 const auto nElements = list->getLength();
116 for (unsigned long i(0); i != nElements; ++i) {
117 xercesc::DOMNode const* thisNode = list->item(i);
118 const auto *thisElement = dynamic_cast<xercesc::DOMElement const*> (thisNode);
119 if (thisElement) {
120 insertDefinition(parseXmlElement(thisElement));
121 }
122 }
123 parser->release();
124 return ok;
125}
126
127bool
129 struct stat buffer{};
130
131 return(stat(m_source.c_str(), &buffer) == 0);
132}
133
135ReadFromXmlDom::parseXmlElement(const xercesc::DOMElement* element) {
137 enum RegXHistoGroups {
138 TYPE, NAME, TITLE, NX, NY, NZ, XLO, YLO, ZLO, XHI, YHI, ZHI, XAXIS, YAXIS, ZAXIS, FOLDER, NGROUPS
139 };
140 const std::array<std::string, NGROUPS> attrNames = {
141 "type", "id", "title", "n", "n", "n", "lo", "lo", "lo", "hi", "hi", "hi", "title", "title", "title", "folder"
142 };
143 //
144 // transform the std::string attribute names to Xerces string attribute names
145 std::array<XercesString, NGROUPS> xercesNames;
146 std::transform(attrNames.begin(), attrNames.end(), xercesNames.begin(), [](const std::string& s) {
147 return fromNative(s);
148 });
149 // Use this array to store the primary returned attribute values, which will be Xerces strings
150 std::array<XercesString, NGROUPS> xercesValues;
151 //
152 constexpr unsigned int NAXES = 3; // allow only three axes, could be extended later
153 const std::array<std::string, NAXES> allowedAxisNames = {
154 "x", "y", "z"
155 };
156
157 //
158 xercesValues[NAME] = element->getAttribute(xercesNames[NAME].c_str());
159 xercesValues[TITLE] = element->getAttribute(xercesNames[TITLE].c_str());
160 xercesValues[TYPE] = element->getAttribute(xercesNames[TYPE].c_str());
161 xercesValues[FOLDER] = element->getAttribute(xercesNames[FOLDER].c_str());
162 const std::string type = toNative(xercesValues[TYPE]);
163 const bool isTProfile = (type == "TProfile");
164 // get children of the histogram, these are the two axes
165 const xercesc::DOMElement* axisDef0 = element->getFirstElementChild();
166 if (!axisDef0 and element->hasChildNodes()) {
167 XercesString xercesContent = element->getTextContent();
168 const std::string textContent = toNative(xercesContent);
169 if (textContent.empty()) {
170 return s;
171 }
172 SingleHistogramDefinition sx = isTProfile ? parseTProfileText(textContent) : parseTextLine(textContent);
173 sx.name = toNative(xercesValues[NAME]);
174 sx.histoType = std::move(type);
175 return sx;
176 }
177 //if get to here and axisDef0 is null, theres a problem
178 if (not axisDef0) return s;
179 const xercesc::DOMElement* axisDef1 = axisDef0->getNextElementSibling();
180 if (not axisDef1) return s; //no y axis, abort
181 // but could be ordered x-y or y-x. The following assumes x, y, z ordering for now.
182 std::string axisName0 = toNative(axisDef0->getTagName());
183 std::string axisName1 = toNative(axisDef1->getTagName());
184 const xercesc::DOMElement* axisDef2 = axisDef1->getNextElementSibling();
185 std::string axisName2 = axisDef2 ? toNative(axisDef2->getTagName()) : "z";
186 if (validAxisName(axisName0, allowedAxisNames)
187 and validAxisName(axisName1,allowedAxisNames)
188 and validAxisName(axisName2,allowedAxisNames)
189 and (axisName1 != axisName0) and (axisName2 != axisName1)) {
190 // default order
191 unsigned int xIndex = 0, yIndex = 1, zIndex = 2;
192 if (axisName0 == allowedAxisNames[1]) {
193 // inverted order
194 xIndex = 1;
195 yIndex = 0;
196 }
197 xercesValues[NX + xIndex] = axisDef0->getAttribute(xercesNames[NX + xIndex].c_str());
198 xercesValues[NX + yIndex] = axisDef1->getAttribute(xercesNames[NX + yIndex].c_str());
199 xercesValues[NX + zIndex] = axisDef2 ? axisDef2->getAttribute(xercesNames[NX + zIndex].c_str()) : XercesString();
200 xercesValues[XLO + xIndex] = axisDef0->getAttribute(xercesNames[XLO + xIndex].c_str());
201 xercesValues[XLO + yIndex] = axisDef1->getAttribute(xercesNames[XLO + yIndex].c_str());
202 xercesValues[XLO + zIndex] = axisDef2 ? axisDef2->getAttribute(xercesNames[XLO + zIndex].c_str()) : XercesString();
203 xercesValues[XHI + xIndex] = axisDef0->getAttribute(xercesNames[XHI + xIndex].c_str());
204 xercesValues[XHI + yIndex] = axisDef1->getAttribute(xercesNames[XHI + yIndex].c_str());
205 xercesValues[XHI + zIndex] = axisDef2 ? axisDef2->getAttribute(xercesNames[XHI + zIndex].c_str()) : XercesString();
206 xercesValues[XAXIS + xIndex] = axisDef0->getAttribute(xercesNames[XAXIS + xIndex].c_str());
207 xercesValues[XAXIS + yIndex] = axisDef1->getAttribute(xercesNames[XAXIS + yIndex].c_str());
208 xercesValues[XAXIS + zIndex] = axisDef2 ? axisDef2->getAttribute(xercesNames[XAXIS + zIndex].c_str()) : XercesString();
209 // transform Xerces strings to normal std::string
210 std::array<std::string, NGROUPS> stringValues {
211 ""
212 };
213 std::transform(xercesValues.begin(), xercesValues.end(), stringValues.begin(), [](const XercesString& s) {
214 return toNative(s);
215 });
216 // numerical values are required for some quantities
217 const float NaN = std::nanf(""); // default 'invalid' float is not-a-number (NaN)
218 const unsigned int nx = stringValues[NX].empty() ? 0 : (unsigned int) (std::stoul(stringValues[NX]));
219 const unsigned int ny = stringValues[NY].empty() ? 0 : (unsigned int) (std::stoul(stringValues[NY]));
220 const unsigned int nz = stringValues[NZ].empty() ? 0 : (unsigned int) (std::stoul(stringValues[NZ]));
221 const float xlo = stringValues[XLO].empty() ? NaN : std::stof(stringValues[XLO]);
222 const float ylo = stringValues[YLO].empty() ? NaN : std::stof(stringValues[YLO]);
223 const float zlo = stringValues[ZLO].empty() ? NaN : std::stof(stringValues[ZLO]);
224 const float xhi = stringValues[XHI].empty() ? NaN : std::stof(stringValues[XHI]);
225 const float yhi = stringValues[YHI].empty() ? NaN : std::stof(stringValues[YHI]);
226 const float zhi = stringValues[ZHI].empty() ? NaN : std::stof(stringValues[ZHI]);
227 // now build the histogram definition to return
228 SingleHistogramDefinition sx(stringValues[NAME], stringValues[TYPE], stringValues[TITLE],
229 nx, ny, nz, xlo, xhi, ylo, yhi, zlo, zhi,
230 stringValues[XAXIS], stringValues[YAXIS], stringValues[ZAXIS],
231 stringValues[FOLDER]);
232 return sx;
233 }
234 return s;
235}
236
238ReadFromXmlDom::parseTextLine(const std::string& line) {
240 enum RegXHistoGroups {
241 TOTAL, TITLE, NBINS, XLO, XHI, XAXIS, YAXIS, DUMMY, FOLDER, NGROUPS
242 };
243
244 std::string rex =
245 R"delim(^\s+"([^"]+)"\s+(\d+)\s+([-+.0-9eE]+)\s+([-+.0-9eE]+)\s+"([^"]+)"\s+"([^"]+)"\s*(.*)\s*$)delim";
246 std::regex reg(rex);
247 std::smatch m;
248
249 if (std::regex_match(line, m, reg)) {
250 const bool hasFolder = (m.size() == NGROUPS);
251 s.title = m[TITLE].str();
252 s.xTitle = m[XAXIS].str();
253 s.yTitle = m[YAXIS].str();
254 s.xAxis = IHistogramDefinitionSvc::axesLimits_t(std::stof(m[XLO].str()), std::stof(m[XHI].str()));
255 s.nBinsX = std::stoi(m[NBINS].str());
256 if (hasFolder) {
257 s.folder = m[FOLDER].str();
258 }
259 }
260 s.m_empty = false;
261 return s;
262}
263
265ReadFromXmlDom::parseTProfileText(const std::string& line) {
267 enum RegXHistoGroups {
268 TOTAL, TITLE, NBINS, XLO, XHI, YLO, YHI, XAXIS, YAXIS, DUMMY, FOLDER, NGROUPS
269 };
270
271 // text like: &quot;Test of TProfile&quot; 20 -50 50 0 200 &quot;#eta&quot; &quot;testEntries&quot;
272 std::string rex =
273 R"delim(^\s+"([^"]+)"\s+(\d+)\s+([-+.0-9eE]+)\s+([-+.0-9eE]+)\s+([-+.0-9eE]+)\s+([-+.0-9eE]+)\s+"([^"]+)"\s+"([^"]+)"\s*(.*)\s*$)delim";
274 std::regex reg(rex);
275 std::smatch m;
276
277 if (std::regex_match(line, m, reg)) {
278 const bool hasFolder = (m.size() == NGROUPS);
279 s.title = m[TITLE].str();
280 s.xTitle = m[XAXIS].str();
281 s.yTitle = m[YAXIS].str();
282 s.xAxis = IHistogramDefinitionSvc::axesLimits_t(std::stof(m[XLO].str()), std::stof(m[XHI].str()));
283 s.yAxis = IHistogramDefinitionSvc::axesLimits_t(std::stof(m[YLO].str()), std::stof(m[YHI].str()));
284 s.nBinsX = std::stoi(m[NBINS].str());
285 if (hasFolder) {
286 s.folder = m[FOLDER].str();
287 }
288 }
289 s.m_empty = false;
290 return s;
291}
static void errorHandler()
#define TYPE(CODE, TYP, IOTYP)
Define macros for attributes used to control the static checker.
#define ATLAS_NO_CHECK_FILE_THREAD_SAFETY
std::pair< float, float > axesLimits_t
typedef for axes limits, (lower bound, upper bound)
const std::string m_format
bool insertDefinition(const SingleHistogramDefinition &oneDefinition)
static SingleHistogramDefinition parseXmlElement(const xercesc::DOMElement *element)
std::string format() const final
static SingleHistogramDefinition parseTProfileText(const std::string &line)
bool sourceExists() const final
bool histoDefinitionMap(std::map< std::string, SingleHistogramDefinition > &usersMap) const final
std::string source() const final
std::vector< SingleHistogramDefinition > m_vectorOfDefinitions
static SingleHistogramDefinition parseTextLine(const std::string &line)
std::string m_source
bool initialize() final
Almost-a-struct for holding the single histogram definition.
MsgStream & msg
Definition testRead.cxx:32
#define NBINS
Definition windows.h:9
XercesString fromNative(const char *str)
std::basic_string< XMLCh > XercesString
std::string toNative(const XMLCh *str)