ATLAS Offline Software
Loading...
Searching...
No Matches
ColumnarPhysliteTest.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
6
7//
8// includes
9//
10
12
13#include <AsgTesting/UnitTest.h>
32
37
38#ifndef XAOD_STANDALONE
40#endif
41
42#include <TFile.h>
43#include <TLeaf.h>
44#include <TTree.h>
45
46#include "ROOT/RNTuple.hxx"
47#include "ROOT/RNTupleInspector.hxx"
48#include "ROOT/RNTupleReader.hxx"
49#include <ROOT/RNTupleView.hxx>
50
51#include <boost/core/demangle.hpp>
52
53#include <algorithm>
54#include <chrono>
55#include <cstdint>
56#include <format>
57#include <memory>
58#include <span>
59#include <vector>
60
61#include <gtest/gtest.h>
62
63//
64// method implementations
65//
66
67namespace columnar
68{
69 // I'm moving code to this namespace but some of the code in this file
70 // is still just in the columnar namespace. As I evolve the code I'll
71 // move more of it to TestUtils.
72 using namespace TestUtils;
73
74 namespace TestUtils
75 {
76
78 ROOT::RNTupleReader* reader = nullptr;
79 ROOT::Experimental::RNTupleInspector* inspector = nullptr;
80 };
81
82 using Backend = std::variant<TTree*, RNTupleBackend*>;
83 template <typename T>
84 class BranchReader final
85 {
86 std::string m_branchName;
87 TBranch* m_branch = nullptr;
88 bool m_isStatic = std::is_pod_v<T>;
89 T* m_data{new T()};
90
91 public:
92 BranchReader(const std::string& val_branchName)
93 : m_branchName(val_branchName)
94 {
95 if (m_branchName.find("Aux.") != std::string::npos)
96 m_isStatic = true;
97 }
98
99 ~BranchReader() noexcept
100 {
101 delete m_data;
102 }
103
104 BranchReader(const BranchReader&) = delete;
106
107 void setIsStatic(bool isStatic)
108 {
109 m_isStatic = isStatic;
110 }
111
112 [[nodiscard]] const std::string& branchName() const
113 {
114 return m_branchName;
115 }
116
117 [[nodiscard]] std::string columnName() const
118 {
119 std::string columnName = m_branchName;
120 if (auto index = columnName.find("AuxDyn."); index != std::string::npos)
121 columnName.replace(index, 6, "");
122 else if (auto index = columnName.find("Aux."); index != std::string::npos)
123 columnName.replace(index, 3, "");
124 else if (columnName.find(".") != std::string::npos)
125 throw std::runtime_error("branch name does not contain AuxDyn or Aux: " +m_branchName);
126 return columnName;
127 }
128
129 [[nodiscard]] std::string containerName() const
130 {
131 if (auto index = m_branchName.find("AuxDyn."); index != std::string::npos)
132 return m_branchName.substr(0, index);
133 else if (auto index = m_branchName.find("Aux."); index != std::string::npos)
134 return m_branchName.substr(0, index);
135 else if (m_branchName.find(".") == std::string::npos)
136 return m_branchName;
137 else
138 throw std::runtime_error("branch name does not contain AuxDyn or Aux: " +m_branchName);
139 }
140
141 void connectTree(TTree* tree)
142 {
143 m_branch = tree->GetBranch(m_branchName.c_str());
144 if (!m_branch)
145 throw std::runtime_error("failed to get branch: " + m_branchName);
146 m_branch->SetMakeClass(1);
147 if (m_isStatic)
148 m_branch->SetAddress(m_data);
149 else
150 m_branch->SetAddress(&m_data);
151 }
152
153 void connectTree(const Backend& b)
154 {
155 auto* tree = std::get<TTree*>(b); // throws if wrong backend
157 }
158
159 const T& getEntry(Long64_t entry)
160 {
161 if (!m_branch)
162 throw std::runtime_error("branch not connected: " + m_branchName);
163 if (m_branch->GetEntry(entry) <= 0)
164 throw std::runtime_error("failed to get entry " + std::to_string(entry) + " for branch: " + m_branchName);
165 if (m_data == nullptr)
166 throw std::runtime_error("got nullptr reading data for branch: " + m_branchName);
167 return *m_data;
168 }
169
170 const T& getCachedEntry() const
171 {
172 return *m_data;
173 }
174
175 std::optional<float> entrySize() const
176 {
177 if (!m_branch)
178 return std::nullopt;
179 return static_cast<float>(m_branch->GetZipBytes()) / m_branch->GetEntries();
180 }
181
182 std::optional<float> uncompressedSize() const
183 {
184 if (!m_branch)
185 return std::nullopt;
186 return static_cast<float>(m_branch->GetTotBytes()) / m_branch->GetEntries();
187 }
188
189 // technically this is const-correct, but I don't want to convince
190 // the code checker of that
191 std::optional<unsigned> numBaskets()
192 {
193 if (!m_branch)
194 return std::nullopt;
195 return m_branch->GetListOfBaskets()->GetSize();
196 }
197 };
198
199 template <typename T>
200 class BranchReaderArray final
201 {
202 public:
203 std::string m_branchName;
204 TBranch* m_branch = nullptr;
205 std::vector<T> m_dataVec;
206
207 public:
208 BranchReaderArray(const std::string& val_branchName)
209 : m_branchName(val_branchName)
210 {}
211
214
215 [[nodiscard]] std::string columnName() const
216 {
217 std::string columnName = m_branchName;
218 if (auto index = columnName.find("AuxDyn."); index != std::string::npos)
219 columnName.replace(index, 6, "");
220 else if (auto index = columnName.find("Aux."); index != std::string::npos)
221 columnName.replace(index, 3, "");
222 else if (columnName.find(".") != std::string::npos)
223 throw std::runtime_error("branch name does not contain AuxDyn or Aux: " + m_branchName);
224 return columnName;
225 }
226
227 [[nodiscard]] std::string containerName() const
228 {
229 if (auto index = m_branchName.find("AuxDyn."); index != std::string::npos)
230 return m_branchName.substr(0, index);
231 else if (auto index = m_branchName.find("Aux."); index != std::string::npos)
232 return m_branchName.substr(0, index);
233 else if (m_branchName.find(".") == std::string::npos)
234 return m_branchName;
235 else
236 throw std::runtime_error("branch name does not contain AuxDyn or Aux: " + m_branchName);
237 }
238 void connectTree (TTree *tree)
239 {
240 m_branch = tree->GetBranch (m_branchName.c_str());
241 if (!m_branch)
242 throw std::runtime_error ("failed to get branch: " + m_branchName);
243 m_branch->SetMakeClass (1);
244 // FIX ME: I have to have some hard-coded size, see explanation
245 // below.
246 m_dataVec.resize (100);
247 if (!m_dataVec.empty())
248 m_branch->SetAddress (m_dataVec.data());
249 }
250
251 std::span<const T> getEntry (Long64_t entry, std::size_t size)
252 {
253 if (!m_branch)
254 throw std::runtime_error ("branch not connected: " + m_branchName);
255 if (m_dataVec.size() < size)
256 {
257 // FIX ME: in one of the latest releases the repointing below
258 // breaks, and causes memory corruption. so I'm now
259 // preallocating and fail rather than reallocate, and the
260 // problem goes away. maybe it should be investigated at some
261 // point, but this is a test and I already spend a fair amount
262 // of time investigating this. the harm is that this test
263 // consumes a few hundreds bytes more in memory and we may have
264 // to occasionally increase the buffer size to cover all test
265 // files and branch lengths.
266 throw std::runtime_error ("requested size exceeds buffer size for branch: " + m_branchName);
267 // m_dataVec.resize (size);
268 // m_branch->SetAddress (m_dataVec.data());
269 }
270 if (size > 0 && m_branch->GetEntry (entry) <= 0)
271 throw std::runtime_error ("failed to get entry " + std::to_string (entry) + " for branch: " + m_branchName);
272 return std::span<const T>(m_dataVec.data(), size);
273 }
274
275 std::optional<float> entrySize () const
276 {
277 if (!m_branch)
278 return std::nullopt;
279 return static_cast<float>(m_branch->GetZipBytes()) / m_branch->GetEntries();
280 }
281
282 std::optional<float> uncompressedSize () const
283 {
284 if (!m_branch)
285 return std::nullopt;
286 return static_cast<float>(m_branch->GetTotBytes()) / m_branch->GetEntries();
287 }
288
289 // technically this is const-correct, but I don't want to convince
290 // the code checker of that
291 std::optional<unsigned> numBaskets ()
292 {
293 if (!m_branch)
294 return std::nullopt;
295 return m_branch->GetListOfBaskets()->GetSize();
296 }
297 };
298
300 {
303 public:
304
306
308 {
309 if (!m_unknownKeysAllowedTargets.empty())
310 {
311 std::cout << "found unknown keys for " << m_columnName << ":";
312 for (auto& [key, allowedSet] : m_unknownKeysAllowedTargets)
313 {
314 std::cout << " " << std::hex << key << std::dec << " (allowed targets:";
315 for (auto index : allowedSet)
316 std::cout << " " << m_targetNames.at(index);
317 std::cout << ")";
318 }
319 }
320 }
321
322 [[nodiscard]] std::vector<std::string> connect (const ColumnInfo& columnInfo, const std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& offsetColumns, const std::unordered_map<std::string,ColumnInfo>& requestedColumns)
323 {
324 m_columnName = columnInfo.name;
325 std::vector<std::string> keyColumnNames;
326 if (!columnInfo.soleLinkTargetName.empty())
327 {
328 addTarget (columnInfo.soleLinkTargetName, offsetColumns);
329 } else
330 {
331 for (auto& [requestedName, requestedInfo] : requestedColumns)
332 {
333 if (requestedInfo.keyColumnForVariantLink == m_columnName)
334 {
335 keyColumnNames.push_back (requestedName);
336 m_keysColumns.emplace_back();
337 for (const auto& targetName : requestedInfo.variantLinkTargetNames)
338 addTarget (targetName, offsetColumns);
339 }
340 }
341 if (m_keysColumns.empty())
342 throw std::runtime_error ("no key column found for variant link: " + m_columnName);
343 }
344 return keyColumnNames;
345 }
346
347 void clear ()
348 {
349 m_columnData.clear();
350 }
351
352 void checkOffsets (unsigned eventIndex)
353 {
354 for (std::size_t i = 0; i < m_targetNames.size(); ++ i)
355 {
356 auto& targetOffsetColumn = *m_targetOffsetColumns.at(i);
357 if (eventIndex + 1 >= targetOffsetColumn.size())
358 throw std::runtime_error ("target offset column not yet filled for: " + m_targetNames.at(i) + " when checking link column " + m_columnName);
359 }
360 }
361
362 template<typename T>
363 void addLink (const ElementLink<T>& element, unsigned eventIndex)
364 {
365 if (element.isDefault())
366 {
367 addEmptyLink();
368 return;
369 }
370
371 addSplitLink (element.index(), element.key(), eventIndex);
372 }
373
375 {
377 }
378
379 void addSplitLink (std::size_t linkIndex, SG::sgkey_t linkKey, unsigned eventIndex)
380 {
381 if (linkIndex == 0 && linkKey == 0)
382 {
383 addEmptyLink();
384 return;
385 }
386
387 unsigned targetIndex = 0u;
388 while (targetIndex < m_targetKeys.size() && m_targetKeys.at(targetIndex) != linkKey)
389 ++ targetIndex;
390
391 // We didn't find the key, so we try to figure out which of the
392 // targets it could be. The idea is that you wouldn't rely on
393 // this for real tests, but that you then go and fill in those
394 // keys in the central lookup table. It will always record and
395 // report, that means if there is a variant link with extra
396 // targets you didn't declare you will get a diagnostic. This
397 // may be overly cautious, but it gives an extra diagnostic if
398 // maybe you missed a target.
399 if (targetIndex == m_targetKeys.size())
400 {
401 if (!m_unknownKeysAllowedTargets.contains (linkKey))
402 {
403 auto& allowedSet = m_unknownKeysAllowedTargets[linkKey];
404 for (std::size_t i = 0; i < m_targetKeys.size(); ++ i)
405 {
406 if (m_targetKeys.at(i) == 0)
407 allowedSet.insert(i);
408 }
409 }
410 auto& allowedSet = m_unknownKeysAllowedTargets[linkKey];
411 for (auto iter = allowedSet.begin(); iter != allowedSet.end();)
412 {
413 auto index = *iter;
414 auto& targetOffsetColumn = *m_targetOffsetColumns.at(index);
415 if (eventIndex + 1 >= targetOffsetColumn.size())
416 throw std::runtime_error ("target offset column not yet filled for: " + m_targetNames.at(index));
417 if (targetOffsetColumn.at(eventIndex) + linkIndex >= targetOffsetColumn.at(eventIndex + 1))
418 iter = allowedSet.erase(iter);
419 else
420 ++ iter;
421 }
422 // Not quite sure whether it is safer to use or not use one of
423 // the targets from the allowed set in this case. In general
424 // tools are expected to handle invalid links gracefully,
425 // worst case they throw an exception when trying to access
426 // it. So what I came up with is that for variant links we
427 // assume it invalid, but for non-variant links the tool
428 // expects exactly one target and we either found it or throw
429 // an exception.
430 if (m_keysColumns.empty())
431 {
432 if (allowedSet.size() == 1 && m_targetKeys.at(*allowedSet.begin()) == 0 && m_unknownKeysAllowedTargets.size() == 1)
433 targetIndex = *allowedSet.begin();
434 else
435 {
436 std::ostringstream error;
437 error << "target key mismatch: read sgkey " << std::hex << linkKey << std::dec;
438 error << " for column " << m_columnName << " with element index " << linkIndex << " targeting " << m_targetNames.at(0);
439 if (m_targetKeys.at(0) != 0u)
440 {
441 error << ", expected sgkey " << std::hex << m_targetKeys.at(0) << std::dec;
442 } else if (m_unknownKeysAllowedTargets.size() > 1)
443 {
444 error << ", alternate key found for non-variant link:";
445 for (auto& [key, allowedSet] : m_unknownKeysAllowedTargets)
446 {
447 if (key != linkKey)
448 error << " " << std::hex << key << std::dec;
449 }
450 } else
451 {
452 error << ", no expected sgkey configured but the maximum allowed index for the target is " << m_targetOffsetColumns.at(0)->at(eventIndex + 1) - m_targetOffsetColumns.at(0)->at(eventIndex) - 1;
453 }
454 throw std::runtime_error (std::move (error).str());
455 }
456 }
457 }
458
459 if (targetIndex == m_targetKeys.size())
460 {
461 // this creates a link with an unknown key, which the user
462 // will ignore
463 m_columnData.push_back (CM::mergeLinkKeyIndex (0xff, linkIndex));
464 return;
465 }
466
467 auto& targetOffsetColumn = *m_targetOffsetColumns.at(targetIndex);
468 if (eventIndex + 1 >= targetOffsetColumn.size())
469 throw std::runtime_error ("target offset column not yet filled for: " + m_targetNames.at(targetIndex));
470 auto myLinkIndex = linkIndex + targetOffsetColumn.at(eventIndex);
471 if (myLinkIndex >= targetOffsetColumn.at(eventIndex + 1))
472 throw std::runtime_error ("index out of range for link: " + m_columnName + " with element index " + std::to_string(linkIndex) + " targeting " + m_targetNames.at(targetIndex) + " with offset " + std::to_string(targetOffsetColumn.at(eventIndex)) + " and next offset " + std::to_string(targetOffsetColumn.at(eventIndex + 1)));
473
474 m_columnData.push_back (CM::mergeLinkKeyIndex (targetIndex, myLinkIndex));
475 }
476
477 [[nodiscard]] std::size_t size () const noexcept
478 {
479 return m_columnData.size();
480 }
481
482 [[nodiscard]] const typename CM::LinkIndexType* data () const noexcept
483 {
484 return m_columnData.data();
485 }
486
487 [[nodiscard]] auto begin () const noexcept { return m_columnData.begin(); }
488 [[nodiscard]] auto end () const noexcept { return m_columnData.end(); }
489
490 [[nodiscard]] const std::vector<typename CM::LinkKeyType>& keysColumn (std::size_t index) const
491 {
492 return m_keysColumns.at(index);
493 }
494
495
496
499 private:
500
501 std::vector<typename CM::LinkIndexType> m_columnData;
502
503 std::string m_columnName;
504
505 std::vector<std::string> m_targetNames;
506 std::vector<SG::sgkey_t> m_targetKeys;
507 std::vector<const std::vector<ColumnarOffsetType>*> m_targetOffsetColumns;
508
509 // there can be multiple keys-columns, hence this is a vector of
510 // vectors. if this is empty, then it is a single-target link
511 std::vector<std::vector<typename CM::LinkKeyType>> m_keysColumns;
512
513 std::unordered_map<SG::sgkey_t,std::unordered_set<std::size_t>> m_unknownKeysAllowedTargets;
514
515
516
517 void addTarget (const std::string& name, const std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& offsetColumns)
518 {
519 unsigned targetIndex = 0;
520 while (targetIndex < m_targetNames.size() && m_targetNames.at(targetIndex) != name)
521 ++ targetIndex;
522 if (targetIndex == m_targetNames.size())
523 {
524 m_targetNames.push_back(name);
525 if (auto offsetIter = offsetColumns.find (name); offsetIter != offsetColumns.end())
526 m_targetOffsetColumns.push_back (offsetIter->second);
527 else
528 throw std::runtime_error ("missing offset column: " + name);
529 if (auto keyIter = knownKeys.find (name); keyIter != knownKeys.end())
530 m_targetKeys.push_back (keyIter->second);
531 else
532 m_targetKeys.push_back (0);
533 }
534 if (!m_keysColumns.empty())
535 m_keysColumns.back().push_back (targetIndex);
536 }
537 };
538
539 template <typename T>
540 class RNTFieldReader final
541 {
542 std::string m_FieldName;
543 std::unique_ptr<ROOT::RNTupleView<T>> m_view;
544 ROOT::Experimental::RNTupleInspector* m_inspector = nullptr;
545 ROOT::RNTupleReader* m_reader = nullptr;
546 const T* m_data = nullptr;
547
548 public:
549 RNTFieldReader(const std::string& val_fieldName)
550 : m_FieldName(val_fieldName)
551 {}
552
553 ~RNTFieldReader() noexcept {}
556
557 [[nodiscard]] const std::string& fieldName() const
558 {
559 return m_FieldName;
560 }
561
562 [[nodiscard]] std::string columnName() const
563 {
564 std::string columnName = m_FieldName;
565 if (auto index = columnName.find("AuxDyn:"); index != std::string::npos)
566 columnName.replace(index, 6, "");
567 else if (auto index = columnName.find("Aux:."); index != std::string::npos)
568 columnName.replace(index, 4, "");
569 else if (auto index = columnName.find("Aux:"); index != std::string::npos)
570 columnName.replace(index, 3, "");
571 else if (columnName.find(":") != std::string::npos)
572 throw std::runtime_error("field name does not contain AuxDyn or Aux: " + m_FieldName);
573 std::replace(columnName.begin(), columnName.end(), ':', '.');
574
575 return columnName;
576 }
577
578 [[nodiscard]] std::string containerName() const
579 {
580 if (auto index = m_FieldName.find("AuxDyn:"); index != std::string::npos)
581 return m_FieldName.substr(0, index);
582 else if (auto index = m_FieldName.find("Aux:"); index != std::string::npos)
583 return m_FieldName.substr(0, index);
584 else if (m_FieldName.find(":") == std::string::npos)
585 return m_FieldName;
586 else
587 throw std::runtime_error("field name does not contain AuxDyn or Aux: " + m_FieldName);
588 }
589
590 void connectRNTuple(ROOT::RNTupleReader* reader,
591 ROOT::Experimental::RNTupleInspector* inspector)
592 {
593 m_inspector = inspector;
594 m_reader = reader;
595 m_view = std::make_unique<ROOT::RNTupleView<T>>(reader->GetView<T>(m_FieldName));
596
597 if (!m_view)
598 throw std::runtime_error("failed to get field: " + m_FieldName);
599 }
600
601 void connectTree(const Backend& b)
602 {
603 auto* rntbackend = std::get<RNTupleBackend*>(b); // throws if wrong backend
604
605 if (!rntbackend->reader || !rntbackend->inspector)
606 throw std::runtime_error("RNTuple backend not properly initialized");
607 connectRNTuple(rntbackend->reader, rntbackend->inspector);
608 }
609
610
611 const T& getEntry(Long64_t entry)
612 {
613 if (!m_view)
614 throw std::runtime_error("field not connected: " + m_FieldName);
615
616 m_data = &((*m_view)(static_cast<ROOT::NTupleSize_t>(entry)));
617
618 if (m_data == nullptr)
619 throw std::runtime_error("got nullptr reading data for field: " + m_FieldName);
620 return *m_data;
621 }
622
623 const T& getCachedEntry() const
624 {
625 return *m_data;
626 }
627
628 std::optional<float> entrySize() const
629 {
630
631 const ROOT::Experimental::RNTupleInspector::RFieldTreeInspector& fieldTreeInspector = m_inspector->GetFieldTreeInspector(m_FieldName);
632 return static_cast<float>(fieldTreeInspector.GetCompressedSize()) /
633 m_inspector->GetDescriptor().GetNEntries();
634 }
635
636 std::optional<float> uncompressedSize() const
637 {
638
639 const ROOT::Experimental::RNTupleInspector::RFieldTreeInspector& fieldTreeInspector = m_inspector->GetFieldTreeInspector(m_FieldName);
640
641 return static_cast<float>(fieldTreeInspector.GetUncompressedSize()) /
642 m_inspector->GetDescriptor().GetNEntries();
643 }
644
645 std::optional<unsigned> numBaskets()
646 {
647 // placeholder
648 return std::nullopt;
649 }
650 };
651
653 {
654 public:
655
657 {
658 std::string name;
659 bool isOffset = false;
660 bool primary = false;
661 bool enabled = false;
663 };
664 std::vector<OutputColumnInfo> outputColumns;
665
666 virtual ~IColumnData () noexcept = default;
667
668 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) = 0;
669
672 {
673 for (auto& col : outputColumns)
674 {
675 if (col.enabled)
676 col.columnIndex = header.getColumnIndex (col.name);
677 }
678 }
679
680 virtual void clearColumns () = 0;
681
682 virtual void getEntry (Long64_t entry) = 0;
683
684 virtual void setData (ColumnVectorData& columnData) = 0;
685
686 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) = 0;
687
688 virtual void collectColumnData () = 0;
689 };
690
692 {
693 std::array<ColumnarOffsetType, 2> data = {0, 0};
694
696 {
697 outputColumns.push_back ({.name = eventRangeColumnName, .isOffset = true});
698 }
699
700 virtual bool connect(Backend /*source*/, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/,std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
701 {
702 if (requestedColumns.contains(outputColumns.at(0).name))
703 {
704 requestedColumns.erase(outputColumns.at(0).name);
705 outputColumns.at(0).enabled = true;
706 return true;
707 }
708 return false;
709 }
710 virtual void clearColumns () override
711 {
712 data[0] = 0;
713 data[1] = 0;
714 }
715
716 virtual void getEntry (Long64_t /*entry*/) override
717 {
718 data[1] += 1;
719 }
720
721 virtual void setData (ColumnVectorData& columnData) override
722 {
723 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
724 columnData.setColumn (outputColumns.at(0).columnIndex, data.size(), data.data());
725 }
726
727 [[nodiscard]] virtual BranchPerfData getPerfData (float /*emptyTime*/) override
728 {
729 BranchPerfData result;
730 result.name = "EventCount(auto)";
731 return result;
732 }
733
734 virtual void collectColumnData () override
735 {}
736 };
737
738
739 template <typename T, template <typename> class Reader>
741 {
742 Reader<T> branchReader;
745 std::vector<T> outData;
746 unsigned entries = 0;
747
748 explicit ColumnDataScalar (const std::string& val_branchName)
749 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
750 {
751 outputColumns.push_back ({.name = branchReader.columnName()});
752 }
753
754 virtual bool connect( Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/,std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
755 {
756 auto iter = requestedColumns.find (outputColumns.at(0).name);
757 if (iter == requestedColumns.end())
758 return false;
759 outputColumns.at(0).enabled = true;
760 requestedColumns.erase (iter);
761
762 branchReader.connectTree (source);
763
764 return true;
765 }
766 virtual void clearColumns () override
767 {
768 outData.clear ();
769 }
770
771 virtual void getEntry (Long64_t entry) override
772 {
773 benchmark.startTimer ();
774 const auto& branchData = branchReader.getEntry (entry);
775 benchmark.stopTimer ();
776 benchmarkUnpack.startTimer ();
777 outData.push_back (branchData);
778 benchmarkUnpack.stopTimer ();
779 }
780
781 virtual void setData (ColumnVectorData& columnData) override
782 {
783 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
784 columnData.setColumn (outputColumns.at(0).columnIndex, outData.size(), outData.data());
785 }
786
787 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
788 {
789 BranchPerfData result;
790 result.name = branchReader.columnName();
791 result.timeRead = benchmark.getEntryTime(emptyTime);
792 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
793 benchmark.setSilence();
794 benchmarkUnpack.setSilence();
795 result.entrySize = branchReader.entrySize();
796 result.uncompressedSize = branchReader.uncompressedSize();
797 result.numBaskets = branchReader.numBaskets();
798 result.entries = entries;
799 return result;
800 }
801
802 virtual void collectColumnData () override
803 {
804 entries += outData.size();
805 }
806 };
807
808 template <typename T, template <typename> class Reader>
810 {
811 Reader<std::vector<T>> branchReader;
812 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
813 std::vector<ColumnarOffsetType> offsets = {0};
814 std::vector<T> outData;
817 unsigned entries = 0;
818
819 explicit ColumnDataVector (const std::string& val_branchName)
820 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
821 {
822 outputColumns.push_back ({.name = branchReader.columnName()});
823 outputColumns.push_back ({.name = branchReader.containerName(), .isOffset = true, .primary = false});
824 }
825
826 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
827 {
828 auto iter = requestedColumns.find (outputColumns.at(0).name);
829 if (iter == requestedColumns.end())
830 return false;
831 outputColumns.at(0).enabled = true;
832
833 branchReader.connectTree(source);
834
835 if (iter->second.offsetName != outputColumns.at(1).name)
836 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
837
838 requestedColumns.erase (iter);
839
840 if (auto offsetIter = offsetColumns.find (outputColumns.at(1).name); offsetIter != offsetColumns.end())
841 offsetColumn = offsetIter->second;
842 else
843 offsetColumns.emplace (outputColumns.at(1).name, &offsets);
844
845 iter = requestedColumns.find (outputColumns.at(1).name);
846 if (iter != requestedColumns.end())
847 {
848 requestedColumns.erase (iter);
849 outputColumns.at(1).enabled = true;
850 }
851
852 return true;
853 }
854
855 virtual void clearColumns () override
856 {
857 offsets.clear ();
858 offsets.push_back (0);
859 outData.clear ();
860 }
861
862 virtual void getEntry (Long64_t entry) override
863 {
864 benchmark.startTimer ();
865 const auto& branchData = branchReader.getEntry (entry);
866 benchmark.stopTimer ();
867 benchmarkUnpack.startTimer ();
868 outData.insert (outData.end(), branchData.begin(), branchData.end());
869 offsets.push_back (outData.size());
870 benchmarkUnpack.stopTimer ();
871 }
872
873 virtual void setData (ColumnVectorData& columnData) override
874 {
875 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
876 columnData.setColumn (outputColumns.at(0).columnIndex, outData.size(), outData.data());
877 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
878 columnData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
879 if (offsetColumn)
880 {
881 if (offsetColumn->size() != offsets.size())
882 throw std::runtime_error ("offset column not filled yet: " + outputColumns.at(1).name);
883 if (offsetColumn->back() != offsets.back())
884 throw std::runtime_error ("offset column does not match: " + outputColumns.at(1).name);
885 }
886 }
887
888 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
889 {
890 BranchPerfData result;
891 result.name = branchReader.columnName();
892 result.timeRead = benchmark.getEntryTime(emptyTime);
893 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
894 benchmark.setSilence();
895 benchmarkUnpack.setSilence();
896 result.entrySize = branchReader.entrySize();
897 result.uncompressedSize = branchReader.uncompressedSize();
898 result.numBaskets = branchReader.numBaskets();
899 result.entries = entries;
900 return result;
901 }
902
903 virtual void collectColumnData () override
904 {
905 entries += outData.size();
906 }
907 };
908
909 template <typename T, template <typename> class Reader>
911 {
913 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
914 std::vector<T> outData;
915 unsigned entries = 0;
916
917 ColumnDataOutVector (const std::string& val_columnName, const T& val_defaultValue)
918 : defaultValue (val_defaultValue)
919 {
920 outputColumns.push_back ({.name = val_columnName});
921 }
922
923 virtual bool connect([[maybe_unused]]Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
924 {
925 auto iter = requestedColumns.find (outputColumns.at(0).name);
926 if (iter == requestedColumns.end())
927 return false;
928 outputColumns.at(0).enabled = true;
929
930 // WARNING: absolutely do not switch the next line to a
931 // reference, the pointed to element gets deleted below.
932 const auto offsetName = iter->second.offsetName;
933 if (offsetName.empty())
934 throw std::runtime_error ("missing offset column for: " + outputColumns.at(0).name);
935
936 requestedColumns.erase (iter);
937
938 if (auto offsetIter = offsetColumns.find (offsetName); offsetIter != offsetColumns.end())
939 offsetColumn = offsetIter->second;
940 else
941 throw std::runtime_error ("missing offset column for: " + outputColumns.at(0).name);
942 return true;
943 }
944
945 virtual void clearColumns () override
946 {
947 outData.clear ();
948 }
949
950 virtual void getEntry (Long64_t /*entry*/) override
951 {
952 outData.resize (offsetColumn->back(), defaultValue);
953 }
954
955 virtual void setData (ColumnVectorData& columnData) override
956 {
957 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
958 columnData.setColumn (outputColumns.at(0).columnIndex, outData.size(), outData.data());
959 }
960
961 [[nodiscard]] virtual BranchPerfData getPerfData (float /*emptyTime*/) override
962 {
963 BranchPerfData result;
964 result.name = outputColumns.at(0).name + "(out)";
965 result.entries = entries;
966 return result;
967 }
968
969 virtual void collectColumnData () override
970 {
971 entries += outData.size();
972 }
973 };
974
975 template <typename T, template <typename> class Reader>
977 {
978 Reader<std::vector<std::vector<T>>> branchReader;
979 std::vector<ColumnarOffsetType> offsets = {0};
980 std::vector<T> columnData;
983 unsigned entries = 0;
984
985 explicit ColumnDataVectorVector (const std::string& val_branchName)
986 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
987 {
988 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
989 outputColumns.push_back ({.name = branchReader.columnName() + ".offset", .isOffset = true});
990 }
991
992 virtual bool connect(Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
993 {
994 auto iter = requestedColumns.find (outputColumns.at(0).name);
995 if (iter == requestedColumns.end())
996 return false;
997 outputColumns.at(0).enabled = true;
998
999 branchReader.connectTree(source);
1000
1001 if (iter->second.offsetName != outputColumns.at(1).name)
1002 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1003
1004 requestedColumns.erase (iter);
1005
1006 iter = requestedColumns.find (outputColumns.at(1).name);
1007 if (iter == requestedColumns.end())
1008 return true;
1009 requestedColumns.erase (iter);
1010 outputColumns.at(1).enabled = true;
1011 return true;
1012 }
1013
1014 virtual void clearColumns () override
1015 {
1016 columnData.clear();
1017 offsets.clear();
1018 offsets.push_back (0);
1019 }
1020
1021 virtual void getEntry (Long64_t entry) override
1022 {
1023 benchmark.startTimer ();
1024 const auto& branchData = branchReader.getEntry (entry);
1025 benchmark.stopTimer ();
1026 benchmarkUnpack.startTimer ();
1027 for (auto& data : branchData)
1028 {
1029 columnData.insert (columnData.end(), data.begin(), data.end());
1030 offsets.push_back (columnData.size());
1031 }
1032 benchmarkUnpack.stopTimer ();
1033 }
1034
1035 virtual void setData (ColumnVectorData& colData) override
1036 {
1037 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1038 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1039 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1040 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1041 }
1042
1043 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1044 {
1045 BranchPerfData result;
1046 result.name = branchReader.columnName();
1047 result.timeRead = benchmark.getEntryTime(emptyTime);
1048 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1049 benchmark.setSilence();
1050 benchmarkUnpack.setSilence();
1051 result.entrySize = branchReader.entrySize();
1052 result.uncompressedSize = branchReader.uncompressedSize();
1053 result.numBaskets = branchReader.numBaskets();
1054 result.entries = entries;
1055 return result;
1056 }
1057
1058 virtual void collectColumnData () override
1059 {
1060 entries += columnData.size();
1061 }
1062 };
1063
1064 template <typename T, template <typename> class Reader>
1066 {
1068 Reader<std::vector<std::vector<ElementLink<T>>>> branchReader;
1069 std::vector<ColumnarOffsetType> offsets = {0};
1070 std::vector<ColumnarOffsetType> eventOffsets = {0};
1074 unsigned entries = 0;
1075 unsigned nullEntries = 0;
1076
1077 explicit ColumnDataVectorVectorLink (const std::string& val_branchName)
1078 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
1079 {
1080 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
1081 outputColumns.push_back ({.name = branchReader.columnName() + ".offset", .isOffset = true});
1082 }
1083
1084 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1085 {
1086 auto iter = requestedColumns.find (outputColumns.at(0).name);
1087 if (iter == requestedColumns.end())
1088 return false;
1089 outputColumns.at(0).enabled = true;
1090
1091 branchReader.connectTree(source);
1092
1093 if (iter->second.offsetName != outputColumns.at(1).name)
1094 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1095 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1096 {
1097 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1098 requestedColumns.erase (keyColumn);
1099 }
1100
1101 requestedColumns.erase (iter);
1102
1103 iter = requestedColumns.find (outputColumns.at(1).name);
1104 if (iter == requestedColumns.end())
1105 return true;
1106 requestedColumns.erase (iter);
1107 outputColumns.at(1).enabled = true;
1108 return true;
1109 }
1110
1111 virtual void clearColumns () override
1112 {
1113 columnData.clear();
1114 offsets.clear();
1115 offsets.push_back (0);
1116 eventOffsets.clear();
1117 eventOffsets.push_back (0);
1118 }
1119
1120 virtual void getEntry (Long64_t entry) override
1121 {
1122 benchmark.startTimer ();
1123 const auto& branchData = branchReader.getEntry (entry);
1124 benchmark.stopTimer ();
1125 benchmarkUnpack.startTimer ();
1126 columnData.checkOffsets (eventOffsets.size() - 1);
1127 for (auto& data : branchData)
1128 {
1129 for (auto& element : data)
1130 {
1131 columnData.addLink (element, eventOffsets.size()-1);
1132 }
1133 offsets.push_back (columnData.size());
1134 }
1135 eventOffsets.push_back (offsets.size());
1136 benchmarkUnpack.stopTimer ();
1137 }
1138
1139 virtual void setData (ColumnVectorData& colData) override
1140 {
1141 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1142 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1143 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1144 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1145 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1146 {
1147 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1148 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1149 }
1150 }
1151
1152 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1153 {
1154 BranchPerfData result;
1155 result.name = branchReader.columnName();
1156 result.timeRead = benchmark.getEntryTime(emptyTime);
1157 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1158 benchmark.setSilence();
1159 benchmarkUnpack.setSilence();
1160 result.entrySize = branchReader.entrySize();
1161 result.uncompressedSize = branchReader.uncompressedSize();
1162 result.numBaskets = branchReader.numBaskets();
1163 result.entries = entries;
1164 result.nullEntries = nullEntries;
1165 return result;
1166 }
1167
1168 virtual void collectColumnData () override
1169 {
1170 entries += columnData.size();
1171 for (const auto& index : columnData)
1172 {
1174 nullEntries += 1;
1175 }
1176 }
1177 };
1178
1179 template <typename T, template <typename> class Reader>
1181 {
1182 std::string columnName;
1183 Reader<std::vector<std::vector<std::vector<T>>>> branchReader;
1184 std::vector<ColumnarOffsetType> outerOffsets = {0};
1185 std::vector<ColumnarOffsetType> innerOffsets = {0};
1186 std::vector<T> columnData;
1189 unsigned entries = 0;
1190
1191 explicit ColumnDataVectorVectorVector (const std::string& val_branchName)
1193 {
1194 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
1195 outputColumns.push_back ({.name = branchReader.columnName() + ".innerOffset", .isOffset = true});
1196 outputColumns.push_back ({.name = branchReader.columnName() + ".outerOffset", .isOffset = true});
1197 }
1198
1199 virtual bool connect(Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1200 {
1201 auto iter = requestedColumns.find (outputColumns.at(0).name);
1202 if (iter == requestedColumns.end())
1203 return false;
1204 outputColumns.at(0).enabled = true;
1205
1206 branchReader.connectTree(source);
1207
1208 if (iter->second.offsetName != outputColumns.at(1).name)
1209 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1210
1211 requestedColumns.erase (iter);
1212
1213 iter = requestedColumns.find (outputColumns.at(1).name);
1214 if (iter == requestedColumns.end())
1215 return true;
1216 outputColumns.at(1).enabled = true;
1217
1218 if (iter->second.offsetName != outputColumns.at(2).name)
1219 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(2).name);
1220
1221 requestedColumns.erase (iter);
1222
1223 iter = requestedColumns.find (outputColumns.at(2).name);
1224 if (iter == requestedColumns.end())
1225 return true;
1226 outputColumns.at(2).enabled = true;
1227 requestedColumns.erase (iter);
1228 return true;
1229 }
1230
1231 virtual void clearColumns () override
1232 {
1233 columnData.clear();
1234 innerOffsets.clear();
1235 innerOffsets.push_back (0);
1236 outerOffsets.clear();
1237 outerOffsets.push_back (0);
1238 }
1239
1240 virtual void getEntry (Long64_t entry) override
1241 {
1242 benchmark.startTimer ();
1243 const auto& branchData = branchReader.getEntry (entry);
1244 benchmark.stopTimer ();
1245 benchmarkUnpack.startTimer ();
1246 for (auto& outerData : branchData)
1247 {
1248 for (auto& innerData : outerData)
1249 {
1250 columnData.insert (columnData.end(), innerData.begin(), innerData.end());
1251 innerOffsets.push_back (columnData.size());
1252 }
1253 outerOffsets.push_back (innerOffsets.size()-1);
1254 }
1255 benchmarkUnpack.stopTimer ();
1256 }
1257
1258 virtual void setData (ColumnVectorData& colData) override
1259 {
1260 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1261 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1262 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1263 colData.setColumn (outputColumns.at(1).columnIndex, innerOffsets.size(), innerOffsets.data());
1264 if (outputColumns.at(2).columnIndex != ColumnVectorHeader::nullIndex)
1265 colData.setColumn (outputColumns.at(2).columnIndex, outerOffsets.size(), outerOffsets.data());
1266 }
1267
1268 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1269 {
1270 BranchPerfData result;
1271 result.name = branchReader.columnName();
1272 result.timeRead = benchmark.getEntryTime(emptyTime);
1273 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1274 benchmark.setSilence();
1275 benchmarkUnpack.setSilence();
1276 result.entrySize = branchReader.entrySize();
1277 result.uncompressedSize = branchReader.uncompressedSize();
1278 result.numBaskets = branchReader.numBaskets();
1279 result.entries = entries;
1280 return result;
1281 }
1282
1283 virtual void collectColumnData () override
1284 {
1285 entries += columnData.size();
1286 }
1287 };
1288
1289 template <typename T, template <typename> class Reader>
1291 {
1293 Reader<std::vector<ElementLink<T>>> branchReader;
1294 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
1295 std::vector<ColumnarOffsetType> offsets = {0};
1299 unsigned entries = 0;
1300 unsigned nullEntries = 0;
1301
1302 ColumnDataVectorLink (const std::string& val_branchName)
1303 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
1304 {
1305 outputColumns.push_back ({.name = branchReader.columnName()});
1306 outputColumns.push_back ({.name = branchReader.containerName(), .isOffset = true, .primary = false});
1307 }
1308
1309
1310 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1311 {
1312 auto iter = requestedColumns.find (outputColumns.at(0).name);
1313 if (iter == requestedColumns.end())
1314 return false;
1315 outputColumns.at(0).enabled = true;
1316
1317 branchReader.connectTree(source);
1318
1319 if (iter->second.offsetName != outputColumns.at(1).name)
1320 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1321 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1322 {
1323 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1324 requestedColumns.erase (keyColumn);
1325 }
1326
1327 requestedColumns.erase (iter);
1328
1329 if (auto offsetIter = offsetColumns.find (outputColumns.at(1).name); offsetIter != offsetColumns.end())
1330 offsetColumn = offsetIter->second;
1331 else
1332 offsetColumns.emplace (outputColumns.at(1).name, &offsets);
1333
1334 iter = requestedColumns.find (outputColumns.at(1).name);
1335 if (iter != requestedColumns.end())
1336 {
1337 outputColumns.at(1).enabled = true;
1338 requestedColumns.erase (iter);
1339 }
1340
1341 return true;
1342 }
1343
1344 virtual void clearColumns () override
1345 {
1346 columnData.clear();
1347 offsets.clear();
1348 offsets.push_back (0);
1349 }
1350
1351 virtual void getEntry (Long64_t entry) override
1352 {
1353 benchmark.startTimer ();
1354 const auto& branchData = branchReader.getEntry (entry);
1355 benchmark.stopTimer ();
1356 benchmarkUnpack.startTimer ();
1357 columnData.checkOffsets (offsets.size() - 1);
1358 for (auto& element : branchData)
1359 columnData.addLink (element, offsets.size()-1);
1360 offsets.push_back (columnData.size());
1361 if (offsetColumn)
1362 {
1363 if (offsetColumn->size() != offsets.size())
1364 throw std::runtime_error ("offset column not filled yet: " + outputColumns.at(1).name);
1365 if (offsetColumn->back() != offsets.back())
1366 throw std::runtime_error ("offset column does not match: " + outputColumns.at(1).name);
1367 }
1368 benchmarkUnpack.stopTimer ();
1369 }
1370
1371 virtual void setData (ColumnVectorData& colData) override
1372 {
1373 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1374 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1375 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1376 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1377 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1378 {
1379 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1380 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1381 }
1382 }
1383
1384 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1385 {
1386 BranchPerfData result;
1387 result.name = branchReader.columnName();
1388 result.timeRead = benchmark.getEntryTime(emptyTime);
1389 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1390 benchmark.setSilence();
1391 benchmarkUnpack.setSilence();
1392 result.entrySize = branchReader.entrySize();
1393 result.uncompressedSize = branchReader.uncompressedSize();
1394 result.numBaskets = branchReader.numBaskets();
1395 result.entries = entries;
1396 result.nullEntries = nullEntries;
1397 return result;
1398 }
1399
1400 virtual void collectColumnData () override
1401 {
1402 entries += columnData.size();
1403 for (const auto& index : columnData)
1404 {
1406 nullEntries += 1;
1407 }
1408 }
1409 };
1410
1411 template <typename T, template <typename> class Reader>
1413 {
1415 Reader<std::vector<ElementLink<T>>> branchReader;
1416 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
1417 std::vector<ColumnarOffsetType> offsets = {0};
1421 unsigned entries = 0;
1422 unsigned nullEntries = 0;
1423
1424 ColumnDataVectorRLink(const std::string& val_branchName)
1425 : branchReader(val_branchName), benchmarkUnpack(branchReader.columnName() + "(unpack)"), benchmark(branchReader.columnName())
1426 {
1427 outputColumns.push_back({.name = branchReader.columnName()});
1428 outputColumns.push_back({.name = branchReader.containerName(), .isOffset = true, .primary = false});
1429 }
1430
1431 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1432 {
1433 auto iter = requestedColumns.find(outputColumns.at(0).name);
1434 if (iter == requestedColumns.end())
1435 return false;
1436 outputColumns.at(0).enabled = true;
1437
1438 branchReader.connectTree(source);
1439 if (iter->second.offsetName != outputColumns.at(1).name)
1440 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1441
1442 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1443 {
1444 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1445 requestedColumns.erase (keyColumn);
1446 }
1447 requestedColumns.erase (iter);
1448
1449 if (auto offsetIter = offsetColumns.find (outputColumns.at(1).name); offsetIter != offsetColumns.end())
1450 offsetColumn = offsetIter->second;
1451 else
1452 offsetColumns.emplace (outputColumns.at(1).name, &offsets);
1453
1454 iter = requestedColumns.find (outputColumns.at(1).name);
1455 if (iter != requestedColumns.end())
1456 {
1457 outputColumns.at(1).enabled = true;
1458 requestedColumns.erase (iter);
1459 }
1460
1461 return true;
1462 }
1463
1464 virtual void clearColumns() override
1465 {
1466 columnData.clear();
1467 offsets.clear();
1468 offsets.push_back(0);
1469 }
1470
1471 virtual void getEntry(Long64_t entry) override
1472 {
1473 benchmark.startTimer();
1474 const auto& branchData = branchReader.getEntry(entry);
1475 benchmark.stopTimer();
1476 benchmarkUnpack.startTimer();
1477
1478 columnData.checkOffsets (offsets.size() - 1);
1479 for (const auto& element : branchData)
1480 {
1481 if (element.isDefault() || element.index() == static_cast<unsigned int>(-1))
1482 columnData.addEmptyLink ();
1483 else
1484 columnData.addSplitLink (element.index(), element.key(), offsets.size()-1);
1485 }
1486
1487
1488 offsets.push_back(columnData.size());
1489
1490 if (offsetColumn) {
1491 if (offsetColumn->size() != offsets.size())
1492 {
1493 throw std::runtime_error("offset column not filled yet: " + outputColumns.at(1).name);
1494 }
1495 if (offsetColumn->back() != offsets.back())
1496 {
1497 throw std::runtime_error("offset column does not match: " + outputColumns.at(1).name);
1498 }
1499 }
1500
1501 benchmarkUnpack.stopTimer();
1502 }
1503
1504 virtual void setData(ColumnVectorData& colData) override
1505 {
1506 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1507 colData.setColumn(outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1508 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1509 colData.setColumn(outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1510 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1511 {
1512 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1513 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1514 }
1515 }
1516
1517 [[nodiscard]] virtual BranchPerfData getPerfData(float emptyTime) override
1518 {
1519 BranchPerfData result;
1520 result.name = branchReader.columnName();
1521 result.timeRead = benchmark.getEntryTime(emptyTime);
1522 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1523 benchmark.setSilence();
1524 benchmarkUnpack.setSilence();
1525 result.entrySize = branchReader.entrySize();
1526 result.uncompressedSize = branchReader.uncompressedSize();
1527 result.numBaskets = branchReader.numBaskets();
1528 result.entries = entries;
1529 result.nullEntries = nullEntries;
1530 return result;
1531 }
1532
1533 virtual void collectColumnData() override
1534 {
1535 entries += columnData.size();
1536 for (const auto& index : columnData)
1537 {
1539 nullEntries += 1;
1540 }
1541 }
1542 };
1543
1544
1545 template<typename T>
1547 {
1552 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
1553 std::vector<ColumnarOffsetType> offsets = {0};
1557 unsigned entries = 0;
1558 unsigned nullEntries = 0;
1559
1560 ColumnDataVectorSplitLink (const std::string& val_branchName)
1561 : branchReaderSize (val_branchName), branchReaderKey (val_branchName + ".m_persKey"), branchReaderIndex (val_branchName + ".m_persIndex"), benchmarkUnpack (branchReaderSize.columnName()+"(unpack)"), benchmark (branchReaderSize.columnName())
1562 {
1563 outputColumns.push_back ({.name = branchReaderSize.columnName()});
1564 outputColumns.push_back ({.name = branchReaderSize.containerName(), .isOffset = true, .primary = false});
1565 }
1566
1567 virtual bool connect (Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string,ColumnInfo>& requestedColumns) override
1568 {
1569 auto iter = requestedColumns.find (outputColumns.at(0).name);
1570 if (iter == requestedColumns.end())
1571 return false;
1572 outputColumns.at(0).enabled = true;
1573 auto* tree = std::get<TTree*>(source);
1574 branchReaderSize.connectTree (tree);
1575 branchReaderKey.connectTree (tree);
1576 branchReaderIndex.connectTree (tree);
1577
1578 if (iter->second.offsetName != outputColumns.at(1).name)
1579 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1580
1581 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1582 {
1583 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1584 requestedColumns.erase (keyColumn);
1585 }
1586 requestedColumns.erase (iter);
1587
1588 if (auto offsetIter = offsetColumns.find (outputColumns.at(1).name); offsetIter != offsetColumns.end())
1589 offsetColumn = offsetIter->second;
1590 else
1591 offsetColumns.emplace (outputColumns.at(1).name, &offsets);
1592
1593 iter = requestedColumns.find (outputColumns.at(1).name);
1594 if (iter != requestedColumns.end())
1595 {
1596 outputColumns.at(1).enabled = true;
1597 requestedColumns.erase (iter);
1598 }
1599
1600 return true;
1601 }
1602
1603 virtual void clearColumns () override
1604 {
1605 columnData.clear();
1606 offsets.clear();
1607 offsets.push_back (0);
1608 }
1609
1610 virtual void getEntry (Long64_t entry) override
1611 {
1612 benchmark.startTimer ();
1613 std::size_t branchDataSize = branchReaderSize.getEntry (entry);
1614 auto branchDataKey = branchReaderKey.getEntry (entry, branchDataSize);
1615 auto branchDataIndex = branchReaderIndex.getEntry (entry, branchDataSize);
1616 benchmark.stopTimer ();
1617 benchmarkUnpack.startTimer ();
1618 columnData.checkOffsets (offsets.size() - 1);
1619 for (std::size_t index = 0; index < branchDataSize; ++index)
1620 {
1621 if (branchDataIndex[index] == static_cast<UInt_t>(-1))
1622 columnData.addEmptyLink ();
1623 else
1624 columnData.addSplitLink (branchDataIndex[index], branchDataKey[index], offsets.size()-1);
1625 }
1626 offsets.push_back (columnData.size());
1627 if (offsetColumn)
1628 {
1629 if (offsetColumn->size() != offsets.size())
1630 throw std::runtime_error ("offset column not filled yet: " + outputColumns.at(1).name);
1631 if (offsetColumn->back() != offsets.back())
1632 throw std::runtime_error ("offset column does not match: " + outputColumns.at(1).name);
1633 }
1634 benchmarkUnpack.stopTimer ();
1635 }
1636
1637 virtual void setData (ColumnVectorData& colData) override
1638 {
1639 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1640 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1641 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1642 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1643 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1644 {
1645 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1646 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1647 }
1648 }
1649
1650 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1651 {
1652 BranchPerfData result;
1653 result.name = branchReaderSize.columnName();
1654 result.timeRead = benchmark.getEntryTime(emptyTime);
1655 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1656 benchmark.setSilence();
1657 benchmarkUnpack.setSilence();
1658 result.entrySize = branchReaderSize.entrySize().value() + branchReaderKey.entrySize().value() + branchReaderIndex.entrySize().value();
1659 result.uncompressedSize = branchReaderSize.uncompressedSize().value() + branchReaderKey.uncompressedSize().value() + branchReaderIndex.uncompressedSize().value();
1660 result.numBaskets = branchReaderSize.numBaskets().value() + branchReaderKey.numBaskets().value() + branchReaderIndex.numBaskets().value();
1661 result.entries = entries;
1662 result.nullEntries = nullEntries;
1663 return result;
1664 }
1665
1666 virtual void collectColumnData () override
1667 {
1668 entries += columnData.size();
1669 for (const auto& index : columnData)
1670 {
1672 nullEntries += 1;
1673 }
1674 }
1675 };
1676
1677 template <typename T, template <typename> class Reader>
1679 {
1681 Reader<std::vector<std::vector<ElementLink<T>>>> branchReader;
1682 std::vector<ColumnarOffsetType> offsets = {0};
1683 std::vector<ColumnarOffsetType> eventOffsets = {0};
1687 unsigned entries = 0;
1688 unsigned nullEntries = 0;
1689
1690 explicit ColumnDataVectorVectorVariantLink (const std::string& val_branchName)
1691 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
1692 {
1693 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
1694 outputColumns.push_back ({.name = branchReader.columnName() + ".offset", .isOffset = true});
1695 }
1696
1697 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1698 {
1699 auto iter = requestedColumns.find (outputColumns.at(0).name);
1700 if (iter == requestedColumns.end())
1701 return false;
1702 outputColumns.at(0).enabled = true;
1703
1704 branchReader.connectTree(source);
1705
1706 if (iter->second.offsetName != outputColumns.at(1).name)
1707 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1708
1709 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1710 {
1711 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1712 requestedColumns.erase (keyColumn);
1713 }
1714
1715 requestedColumns.erase (iter);
1716
1717 iter = requestedColumns.find (outputColumns.at(1).name);
1718 if (iter != requestedColumns.end())
1719 {
1720 outputColumns.at(1).enabled = true;
1721 requestedColumns.erase (iter);
1722 }
1723
1724 iter = requestedColumns.find (outputColumns.at(2).name);
1725 if (iter != requestedColumns.end())
1726 {
1727 outputColumns.at(2).enabled = true;
1728 requestedColumns.erase (iter);
1729 }
1730 return true;
1731 }
1732
1733 virtual void clearColumns () override
1734 {
1735 columnData.clear();
1736 offsets.clear();
1737 offsets.push_back (0);
1738 eventOffsets.clear();
1739 eventOffsets.push_back (0);
1740 }
1741
1742 virtual void getEntry (Long64_t entry) override
1743 {
1744 benchmark.startTimer ();
1745 const auto& branchData = branchReader.getEntry (entry);
1746 benchmark.stopTimer ();
1747 benchmarkUnpack.startTimer ();
1748 columnData.checkOffsets (eventOffsets.size() - 1);
1749 for (auto& data : branchData)
1750 {
1751 for (auto& element : data)
1752 {
1753 columnData.addLink (element, eventOffsets.size()-1);
1754 }
1755 offsets.push_back (columnData.size());
1756 }
1757 eventOffsets.push_back (offsets.size());
1758 benchmarkUnpack.stopTimer ();
1759 }
1760
1761 virtual void setData (ColumnVectorData& colData) override
1762 {
1763 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1764 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1765 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1766 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1767 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1768 {
1769 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1770 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1771 }
1772 }
1773
1774 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1775 {
1776 BranchPerfData result;
1777 result.name = branchReader.columnName();
1778 result.timeRead = benchmark.getEntryTime(emptyTime);
1779 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1780 benchmark.setSilence();
1781 benchmarkUnpack.setSilence();
1782 result.entrySize = branchReader.entrySize();
1783 result.uncompressedSize = branchReader.uncompressedSize();
1784 result.numBaskets = branchReader.numBaskets();
1785 result.entries = entries;
1786 result.nullEntries = nullEntries;
1787 return result;
1788 }
1789
1790 virtual void collectColumnData () override
1791 {
1792 entries += columnData.size();
1793 for (const auto& index : columnData)
1794 {
1796 nullEntries += 1;
1797 }
1798 }
1799 };
1800
1801 template <template <typename> class Reader>
1803 {
1804 Reader<std::vector<std::string>> branchReader;
1805 std::vector<ColumnarOffsetType> offsets = {0};
1806 std::vector<char> columnData;
1807 std::vector<std::size_t> columnHashData;
1810
1811 ColumnDataMetNames (const std::string& val_branchName)
1812 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
1813 {
1814 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
1815 outputColumns.push_back ({.name = branchReader.columnName() + ".offset", .isOffset = true});
1816 outputColumns.push_back ({.name = branchReader.columnName() + "Hash"});
1817 }
1818
1819 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1820 {
1821 auto iter = requestedColumns.find (outputColumns.at(0).name);
1822 if (iter == requestedColumns.end())
1823 return false;
1824 outputColumns.at(0).enabled = true;
1825
1826 branchReader.connectTree(source);
1827
1828 if (iter->second.offsetName != outputColumns.at(1).name)
1829 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1830
1831 requestedColumns.erase (iter);
1832
1833 iter = requestedColumns.find (outputColumns.at(1).name);
1834 if (iter == requestedColumns.end())
1835 {
1836 return true;
1837 }
1838 outputColumns.at(1).enabled = true;
1839 requestedColumns.erase (iter);
1840
1841 iter = requestedColumns.find (outputColumns.at(2).name);
1842 if (iter != requestedColumns.end())
1843 {
1844 outputColumns.at(2).enabled = true;
1845 requestedColumns.erase (iter);
1846 }
1847 return true;
1848 }
1849
1850 virtual void clearColumns () override
1851 {
1852 columnData.clear();
1853 offsets.clear();
1854 offsets.push_back (0);
1855 columnHashData.clear();
1856 }
1857
1858 virtual void getEntry (Long64_t entry) override
1859 {
1860 benchmark.startTimer ();
1861 const auto& branchData = branchReader.getEntry (entry);
1862 benchmark.stopTimer ();
1863 benchmarkUnpack.startTimer ();
1864 for (auto& data : branchData)
1865 {
1866 columnData.insert (columnData.end(), data.begin(), data.end());
1867 offsets.push_back (columnData.size());
1868 columnHashData.push_back (std::hash<std::string> () (data));
1869 }
1870 benchmarkUnpack.stopTimer ();
1871 }
1872
1873 virtual void setData (ColumnVectorData& colData) override
1874 {
1875 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1876 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1877 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1878 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1879 if (outputColumns.at(2).columnIndex != ColumnVectorHeader::nullIndex)
1880 colData.setColumn (outputColumns.at(2).columnIndex, columnHashData.size(), columnHashData.data());
1881 }
1882
1883 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1884 {
1885 BranchPerfData result;
1886 result.name = branchReader.columnName();
1887 result.timeRead = benchmark.getEntryTime(emptyTime);
1888 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1889 benchmark.setSilence();
1890 benchmarkUnpack.setSilence();
1891 result.entrySize = branchReader.entrySize();
1892 result.uncompressedSize = branchReader.uncompressedSize();
1893 result.numBaskets = branchReader.numBaskets();
1894 return result;
1895 }
1896
1897 virtual void collectColumnData () override
1898 {}
1899 };
1900
1901 template <template <typename> class Reader>
1903 {
1904 std::vector<std::string> termNames;
1905 const std::vector<ColumnarOffsetType>* offsetColumns = nullptr;
1906 std::vector<ColumnarOffsetType> offsets = {0};
1907 std::vector<ColumnarOffsetType> namesOffsets = {0};
1908 std::vector<char> namesData;
1909 std::vector<std::size_t> namesHash;
1910
1911 ColumnDataOutputMet (const std::string& val_columnName, std::vector<std::string> val_termNames)
1912 : termNames (std::move (val_termNames))
1913 {
1914 outputColumns.push_back ({.name = val_columnName, .isOffset = true});
1915 outputColumns.push_back ({.name = val_columnName + ".name.data"});
1916 outputColumns.push_back ({.name = val_columnName + ".name.offset", .isOffset = true});
1917 outputColumns.push_back ({.name = val_columnName + ".nameHash"});
1918 }
1919
1920 virtual bool connect([[maybe_unused]]Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1921 {
1922 if (auto iter = requestedColumns.find (outputColumns.at(0).name);
1923 iter != requestedColumns.end())
1924 requestedColumns.erase (iter);
1925 else
1926 return false;
1927 outputColumns.at(0).enabled = true;
1928
1929 if (auto iter = requestedColumns.find (outputColumns.at(1).name);
1930 iter != requestedColumns.end())
1931 {
1932 outputColumns.at(1).enabled = true;
1933 requestedColumns.erase (iter);
1934 }
1935
1936 if (auto iter = requestedColumns.find (outputColumns.at(2).name);
1937 iter != requestedColumns.end())
1938 {
1939 outputColumns.at(2).enabled = true;
1940 requestedColumns.erase (iter);
1941 }
1942
1943 if (auto iter = requestedColumns.find (outputColumns.at(3).name);
1944 iter != requestedColumns.end())
1945 {
1946 outputColumns.at(3).enabled = true;
1947 requestedColumns.erase (iter);
1948 }
1949
1950 // For multi-tool support, skip if offset column already registered
1951 if (auto offsetIter = offsetColumns.find (outputColumns.at(0).name); offsetIter == offsetColumns.end())
1952 offsetColumns.emplace (outputColumns.at(0).name, &offsets);
1953
1954 return true;
1955 }
1956
1957 virtual void clearColumns () override
1958 {
1959 offsets.clear ();
1960 offsets.push_back (0);
1961 namesData.clear ();
1962 namesOffsets.clear ();
1963 namesOffsets.push_back (0);
1964 namesHash.clear ();
1965 }
1966
1967 virtual void getEntry (Long64_t /*entry*/) override
1968 {
1969 for (const auto& termName : termNames)
1970 {
1971 namesData.insert (namesData.end(), termName.begin(), termName.end());
1972 namesOffsets.push_back (namesData.size());
1973 namesHash.push_back (std::hash<std::string> () (termName));
1974 }
1975 offsets.push_back (namesHash.size());
1976 }
1977
1978 virtual void setData (ColumnVectorData& colData) override
1979 {
1980 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1981 colData.setColumn (outputColumns.at(0).columnIndex, offsets.size(), offsets.data());
1982 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1983 colData.setColumn (outputColumns.at(1).columnIndex, namesData.size(), namesData.data());
1984 if (outputColumns.at(2).columnIndex != ColumnVectorHeader::nullIndex)
1985 colData.setColumn (outputColumns.at(2).columnIndex, namesOffsets.size(), namesOffsets.data());
1986 if (outputColumns.at(3).columnIndex != ColumnVectorHeader::nullIndex)
1987 colData.setColumn (outputColumns.at(3).columnIndex, namesHash.size(), namesHash.data());
1988 }
1989
1990 [[nodiscard]] virtual BranchPerfData getPerfData (float /*emptyTime*/) override
1991 {
1992 BranchPerfData result;
1993 result.name = outputColumns.at(0).name + "(met-out)";
1994 return result;
1995 }
1996
1997 virtual void collectColumnData () override
1998 {}
1999 };
2000
2001 template <template <typename> class Reader>
2003 {
2004 Reader<xAOD::CaloClusterContainer> branchReader;
2005 std::vector<ColumnarOffsetType> offsets = {0};
2006 std::vector<std::uint32_t> columnData;
2009 unsigned entries = 0;
2010
2011 ColumnDataSamplingPattern (const std::string& val_branchName)
2012 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+".samplingPattern(fallback)(unpack)"), benchmark (branchReader.columnName() + ".samplingPattern(fallback)")
2013 {
2014 outputColumns.push_back ({.name = branchReader.columnName() + ".samplingPattern"});
2015 outputColumns.push_back ({.name = branchReader.columnName(), .isOffset = true, .primary = false});
2016 }
2017
2018 virtual bool connect(Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
2019 {
2020 auto iter = requestedColumns.find (outputColumns.at(0).name);
2021 if (iter == requestedColumns.end())
2022 return false;
2023 outputColumns.at(0).enabled = true;
2024
2025 branchReader.connectTree(source);
2026 if (iter->second.offsetName != outputColumns.at(1).name)
2027 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
2028
2029 requestedColumns.erase (iter);
2030
2031 iter = requestedColumns.find (outputColumns.at(1).name);
2032 if (iter == requestedColumns.end())
2033 {
2034 return true;
2035 }
2036 outputColumns.at(1).enabled = true;
2037 requestedColumns.erase (iter);
2038 return true;
2039 }
2040
2041 virtual void clearColumns () override
2042 {
2043 columnData.clear();
2044 offsets.clear();
2045 offsets.push_back (0);
2046 }
2047
2048 virtual void getEntry (Long64_t entry) override
2049 {
2050 benchmark.startTimer ();
2051 const auto& branchData = branchReader.getEntry (entry);
2052 benchmark.stopTimer ();
2053 benchmarkUnpack.startTimer ();
2054 for (auto data : branchData)
2055 {
2056 columnData.push_back (data->samplingPattern());
2057 }
2058 offsets.push_back (columnData.size());
2059 benchmarkUnpack.stopTimer ();
2060 }
2061
2062 virtual void setData (ColumnVectorData& colData) override
2063 {
2064 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
2065 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
2066 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
2067 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
2068 }
2069
2070 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
2071 {
2072 BranchPerfData result;
2073 result.name = branchReader.columnName() + "(fallback)";
2074 result.timeRead = benchmark.getEntryTime(emptyTime);
2075 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
2076 benchmark.setSilence();
2077 benchmarkUnpack.setSilence();
2078 result.entrySize = branchReader.entrySize();
2079 result.uncompressedSize = branchReader.uncompressedSize();
2080 result.numBaskets = branchReader.numBaskets();
2081 result.entries = entries;
2082 return result;
2083 }
2084
2085 virtual void collectColumnData () override
2086 {
2087 entries += columnData.size();
2088 }
2089 };
2090
2091 namespace
2092 {
2094 struct ToolData
2095 {
2096 std::string name;
2097 ColumnarTool<ColumnarModeArray>* tool = nullptr;
2098 std::unique_ptr<ToolColumnVectorMap> toolWrapper;
2099 bool noRepeatCall = false;
2100 bool runToolTwice = false;
2101
2102 Benchmark benchmarkCall;
2103 Benchmark benchmarkCall2;
2104
2105 ToolData (const UserConfiguration& config, const TestDefinition& td,
2106 ColumnVectorHeader& columnHeader)
2107 : name (td.name)
2108 , noRepeatCall (td.noRepeatCall)
2109 , runToolTwice (config.runToolTwice)
2110 , benchmarkCall ("", config.batchSize)
2111 , benchmarkCall2 ("", config.batchSize)
2112 {
2113 tool = dynamic_cast<ColumnarTool<ColumnarModeArray>*>(td.tool);
2114 if (!tool)
2115 throw std::runtime_error ("tool is not a ColumnarTool<ColumnarModeArray>: " + td.name);
2116 if (!td.containerRenames.empty())
2118 toolWrapper = std::make_unique<ToolColumnVectorMap> (columnHeader, *tool);
2119 }
2120
2122 void call (ColumnVectorData& columnData)
2123 {
2124 benchmarkCall.startTimer ();
2125 columnData.callNoCheck (*tool);
2126 benchmarkCall.stopTimer ();
2127 if (runToolTwice && !noRepeatCall)
2128 {
2129 benchmarkCall2.startTimer ();
2130 columnData.callNoCheck (*tool);
2131 benchmarkCall2.stopTimer ();
2132 }
2133 }
2134 };
2135 }
2136 }
2137
2138
2139
2140
2141 ColumnarPhysLiteTest ::
2142 ColumnarPhysLiteTest ()
2143 {
2144 static std::once_flag flag;
2145 std::call_once (flag, [] ()
2146 {
2147#ifdef XAOD_STANDALONE
2148 xAOD::Init().ignore();
2149#else
2150 POOL::Init();
2151#endif
2152 });
2153
2154 auto userConfiguration = TestUtils::UserConfiguration::fromEnvironment();
2155 if (userConfiguration.isrntuple)
2156 {
2157 auto* fileName = getenv("ASG_TEST_FILE_RNTUPLE_LITE_MC");
2158 if (fileName == nullptr)
2159 throw std::runtime_error("missing ASG_TEST_FILE_RNTUPLE_LITE_MC");
2160 rntreader = ROOT::RNTupleReader::Open("EventData", fileName);
2161 inspector = ROOT::Experimental::RNTupleInspector::Create("EventData", fileName);
2163 if (!rntreader or !inspector)
2164 throw std::runtime_error("failed to open rntuple");
2165 } else
2166 {
2167 auto* fileName = getenv("ASG_TEST_FILE_LITE_MC");
2168 if (fileName == nullptr)
2169 throw std::runtime_error("missing ASG_TEST_FILE_LITE_MC");
2170 file.reset(TFile::Open(fileName, "READ"));
2171 if (!file)
2172 throw std::runtime_error("failed to open file");
2173 tree = dynamic_cast<TTree*>(file->Get("CollectionTree"));
2174 if (!tree)
2175 throw std::runtime_error("failed to open rntuple");
2176 }
2177 }
2178
2179 ColumnarPhysLiteTest ::~ColumnarPhysLiteTest()
2180 {
2181 if (rntbackend)
2182 delete rntbackend;
2183 }
2184
2185 std::string ColumnarPhysLiteTest :: makeUniqueName ()
2186 {
2187 static std::atomic<unsigned> index = 0;
2188 return "UniquePhysliteTestTool" + std::to_string(++index);
2189 }
2190
2191 bool ColumnarPhysLiteTest ::
2192 checkMode ()
2193 {
2194 return true;
2195 }
2196
2197 void ColumnarPhysLiteTest :: setupKnownColumns (std::span<const TestDefinition> testDefinitions)
2198 {
2199 using namespace TestUtils;
2200
2201 knownColumns.push_back (std::make_shared<ColumnDataEventCount> ());
2202
2203 if (tree)
2204 {
2205 tree->SetMakeClass(1);
2206 {
2207 std::unordered_map<std::string, TBranch*> branches;
2208 {
2209 TIter branchIter(tree->GetListOfBranches());
2210 TObject* obj = nullptr;
2211 while ((obj = branchIter()))
2212 {
2213 TBranch* branch = nullptr;
2214 if ((branch = dynamic_cast<TBranch*>(obj)))
2215 {
2216 branches.emplace(branch->GetName(), branch);
2217 TIter subBranchIter(branch->GetListOfBranches());
2218 while ((obj = subBranchIter()))
2219 {
2220 if (auto subBranch = dynamic_cast<TBranch*>(obj))
2221 branches.emplace(subBranch->GetName(), subBranch);
2222 }
2223 }
2224 }
2225 }
2226
2227 for (const auto& [name, branch] : branches)
2228 {
2229 if (name.find("AuxDyn.") != std::string::npos ||
2230 name.find("Aux.") != std::string::npos)
2231 {
2232 TClass* branchClass = nullptr;
2233 EDataType branchType{};
2234 branch->GetExpectedType(branchClass, branchType);
2235 if (branchClass == nullptr)
2236 {
2237 switch (branchType)
2238 {
2239 case kInt_t:
2240 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::int32_t, BranchReader>>(branch->GetName()));
2241 break;
2242 case kUInt_t:
2243 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint32_t, BranchReader>>(branch->GetName()));
2244 break;
2245 case kULong_t:
2246 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint64_t, BranchReader>>(branch->GetName()));
2247 break;
2248 case kULong64_t:
2249 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint64_t, BranchReader>>(branch->GetName()));
2250 break;
2251 case kFloat_t:
2252 knownColumns.push_back(std::make_shared<ColumnDataScalar<float, BranchReader>>(branch->GetName()));
2253 break;
2254 default:
2255 // no-op
2256 break;
2257 }
2258 } else
2259 {
2260 if (*branchClass->GetTypeInfo() == typeid(std::vector<float>))
2261 {
2262 knownColumns.push_back (std::make_shared<ColumnDataVector<float,BranchReader>> (branch->GetName()));
2263 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<char>))
2264 {
2265 knownColumns.push_back (std::make_shared<ColumnDataVector<char,BranchReader>> (branch->GetName()));
2266 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::int8_t>))
2267 {
2268 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int8_t,BranchReader>> (branch->GetName()));
2269 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::uint8_t>))
2270 {
2271 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint8_t,BranchReader>> (branch->GetName()));
2272 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::int16_t>))
2273 {
2274 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int16_t,BranchReader>> (branch->GetName()));
2275 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::uint16_t>))
2276 {
2277 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint16_t,BranchReader>> (branch->GetName()));
2278 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::int32_t>))
2279 {
2280 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int32_t,BranchReader>> (branch->GetName()));
2281 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::uint32_t>))
2282 {
2283 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint32_t,BranchReader>> (branch->GetName()));
2284 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::int64_t>))
2285 {
2286 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int64_t,BranchReader>> (branch->GetName()));
2287 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::uint64_t>))
2288 {
2289 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint64_t,BranchReader>> (branch->GetName()));
2290 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<float>>))
2291 {
2292 knownColumns.push_back (std::make_shared<ColumnDataVectorVector<float,BranchReader>> (branch->GetName()));
2293 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<std::int32_t>>))
2294 {
2295 knownColumns.push_back (std::make_shared<ColumnDataVectorVector<std::int32_t,BranchReader>> (branch->GetName()));
2296 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<std::uint64_t>>))
2297 {
2298 knownColumns.push_back (std::make_shared<ColumnDataVectorVector<std::uint64_t,BranchReader>> (branch->GetName()));
2299 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<std::vector<std::size_t>>>))
2300 {
2301 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorVector<std::size_t,BranchReader>> (branch->GetName()));
2302 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<std::vector<unsigned char>>>))
2303 {
2304 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorVector<unsigned char,BranchReader>> (branch->GetName()));
2305 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::string>))
2306 {
2307 knownColumns.push_back (std::make_shared<ColumnDataMetNames<BranchReader>> (branch->GetName()));
2308 }
2309 }
2310 }
2311 }
2312 }
2313 // This is a fallback for the case that we don't have an explicit
2314 // `samplingPattern` branch in our input file (i.e. an older file),
2315 // to allow us to still test tools needing it. This is likely not
2316 // something that actual users can do (they need the new files), but
2317 // for testing it seems like a reasonable workaround.
2318 knownColumns.push_back(std::make_shared<ColumnDataSamplingPattern<BranchReader>>("egammaClusters"));
2319
2320 // For branches that are element links they need to be explicitly
2321 // declared to have the correct xAOD type, correct split setting,
2322 // and correct linked containers.
2323
2324 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer,BranchReader>>("AnalysisElectronsAuxDyn.caloClusterLinks"));
2325 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer, BranchReader>>("AnalysisElectronsAuxDyn.trackParticleLinks"));
2326 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer,BranchReader>>("AnalysisPhotonsAuxDyn.caloClusterLinks"));
2327 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::VertexContainer, BranchReader>>("AnalysisPhotonsAuxDyn.vertexLinks"));
2328 knownColumns.push_back(std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>>("AnalysisMuonsAuxDyn.inDetTrackParticleLink"));
2329 knownColumns.push_back(std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>>("AnalysisMuonsAuxDyn.combinedTrackParticleLink"));
2330 knownColumns.push_back(std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>>("AnalysisMuonsAuxDyn.extrapolatedMuonSpectrometerTrackParticleLink"));
2331 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer, BranchReader>>("GSFConversionVerticesAuxDyn.trackParticleLinks"));
2332 knownColumns.push_back(std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>>("GSFTrackParticlesAuxDyn.originalTrackParticle"));
2333 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer, BranchReader>>("AnalysisJetsAuxDyn.GhostTrack"));
2334 knownColumns.push_back(std::make_shared<ColumnDataVectorLink<xAOD::JetContainer, BranchReader>>("METAssoc_AnalysisMETAux.jetLink"));
2335 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer, BranchReader>>("METAssoc_AnalysisMETAux.objectLinks"));
2336
2337 }else if (rntbackend)
2338 {
2339 std::unordered_map<std::string, ROOT::DescriptorId_t> fields;
2340 {
2341 const auto& desc = rntreader->GetDescriptor();
2342
2343 for (const auto& field : desc.GetTopLevelFields())
2344 {
2345 auto fieldName = field.GetFieldName();
2346 fields.emplace(desc.GetQualifiedFieldName(field.GetId()), field.GetId());
2347
2348 std::vector<ROOT::DescriptorId_t> subFieldIds{field.GetId()};
2349 while (!subFieldIds.empty())
2350 {
2351 const auto parentId = subFieldIds.back();
2352 auto parentname=desc.GetQualifiedFieldName(parentId);
2353 subFieldIds.pop_back();
2354
2355 for (const auto& subField : desc.GetFieldIterable(parentId))
2356 {
2357 auto subFieldName = desc.GetQualifiedFieldName(subField.GetId());
2358
2359 fields.emplace(desc.GetQualifiedFieldName(subField.GetId()), subField.GetId());
2360
2361 subFieldIds.push_back(subField.GetId());
2362 }
2363 }
2364 }
2365 }
2366
2367 const auto& desc = rntreader->GetDescriptor();
2368 for (const auto& [name, fieldId] : fields)
2369 {
2370 auto fieldName = desc.GetQualifiedFieldName(fieldId);
2371
2372 if (name.find("AuxDyn:") != std::string::npos ||
2373 name.find("Aux:") != std::string::npos)
2374 {
2375
2376 const auto& fieldDesc = desc.GetFieldDescriptor(fieldId);
2377 const std::string typeName = desc.GetTypeNameForComparison(fieldDesc);
2378 if (typeName == "std::int32_t" || typeName == "int")
2379 {
2380 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::int32_t, RNTFieldReader>>(name));
2381 } else if (typeName == "std::uint32_t" || typeName == "unsigned int")
2382 {
2383 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint32_t, RNTFieldReader>>(name));
2384 } else if (typeName == "std::uint64_t" || typeName == "unsigned long" || typeName == "unsigned long long")
2385 {
2386 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint64_t, RNTFieldReader>>(name));
2387 } else if (typeName == "float")
2388 {
2389 knownColumns.push_back(std::make_shared<ColumnDataScalar<float, RNTFieldReader>>(name));
2390 } else if (typeName == "std::vector<float>")
2391 {
2392 knownColumns.push_back(std::make_shared<ColumnDataVector<float, RNTFieldReader>>(name));
2393 } else if (typeName == "std::vector<char>")
2394 {
2395 knownColumns.push_back(std::make_shared<ColumnDataVector<char, RNTFieldReader>>(name));
2396 } else if (typeName == "std::vector<std::int8_t>")
2397 {
2398 knownColumns.push_back(std::make_shared<ColumnDataVector<std::int8_t, RNTFieldReader>>(name));
2399 } else if (typeName == "std::vector<std::uint8_t>")
2400 {
2401 knownColumns.push_back(std::make_shared<ColumnDataVector<std::uint8_t, RNTFieldReader>>(name));
2402 } else if (typeName == "std::vector<std::int16_t>")
2403 {
2404 knownColumns.push_back(std::make_shared<ColumnDataVector<std::int16_t, RNTFieldReader>>(name));
2405 } else if (typeName == "std::vector<std::uint16_t>")
2406 {
2407 knownColumns.push_back(std::make_shared<ColumnDataVector<std::uint16_t, RNTFieldReader>>(name));
2408 } else if (typeName == "std::vector<std::int32_t>")
2409 {
2410 knownColumns.push_back(std::make_shared<ColumnDataVector<std::int32_t, RNTFieldReader>>(name));
2411 } else if (typeName == "std::vector<std::uint32_t>")
2412 {
2413 knownColumns.push_back(std::make_shared<ColumnDataVector<std::uint32_t, RNTFieldReader>>(name));
2414 } else if (typeName == "std::vector<std::int64_t>")
2415 {
2416 knownColumns.push_back(std::make_shared<ColumnDataVector<std::int64_t, RNTFieldReader>>(name));
2417 } else if (typeName == "std::vector<std::uint64_t>")
2418 {
2419 knownColumns.push_back(std::make_shared<ColumnDataVector<std::uint64_t, RNTFieldReader>>(name));
2420 } else if (typeName == "std::vector<std::vector<float>>")
2421 {
2422 knownColumns.push_back(std::make_shared<ColumnDataVectorVector<float, RNTFieldReader>>(name));
2423 } else if (typeName == "std::vector<std::vector<std::int32_t>>")
2424 {
2425 knownColumns.push_back(std::make_shared<ColumnDataVectorVector<std::int32_t, RNTFieldReader>>(name));
2426 } else if (typeName == "std::vector<std::vector<std::uint64_t>>")
2427 {
2429 } else if (typeName =="std::vector<std::vector<std::vector<std::size_t>>>")
2430 {
2432 }else if (typeName =="std::vector<std::vector<std::vector<std::uint64_t>>>")
2433 {
2435 }else if (typeName =="std::vector<std::vector<std::vector<std::uint8_t>>>")
2436 {
2438 } else if (typeName =="std::vector<std::vector<std::vector<unsigned char>>>")
2439 {
2441 } else if (typeName == "std::vector<std::string>")
2442 {
2443 knownColumns.push_back(std::make_shared<ColumnDataMetNames<RNTFieldReader>>(name));
2444 }
2445 }
2446 }
2447 knownColumns.push_back(std::make_shared<ColumnDataSamplingPattern<RNTFieldReader>>("egammaClusters"));
2448
2449 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer,RNTFieldReader>>("AnalysisElectronsAuxDyn:caloClusterLinks"));
2450 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer, RNTFieldReader>>("AnalysisElectronsAuxDyn:trackParticleLinks"));
2451 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer,RNTFieldReader>>("AnalysisPhotonsAuxDyn:caloClusterLinks"));
2452 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::VertexContainer, RNTFieldReader>>("AnalysisPhotonsAuxDyn:vertexLinks"));
2453 knownColumns.push_back(std::make_shared<ColumnDataVectorRLink<xAOD::TrackParticleContainer,RNTFieldReader>>("AnalysisMuonsAuxDyn:inDetTrackParticleLink"));
2454 knownColumns.push_back(std::make_shared<ColumnDataVectorRLink<xAOD::TrackParticleContainer,RNTFieldReader>>("AnalysisMuonsAuxDyn:combinedTrackParticleLink"));
2455 knownColumns.push_back(std::make_shared<ColumnDataVectorRLink< xAOD::TrackParticleContainer, RNTFieldReader>>("AnalysisMuonsAuxDyn:extrapolatedMuonSpectrometerTrackParticleLink"));
2456 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer, RNTFieldReader>>("GSFConversionVerticesAuxDyn:trackParticleLinks"));
2457 knownColumns.push_back(std::make_shared<ColumnDataVectorRLink<xAOD::TrackParticleContainer,RNTFieldReader>>("GSFTrackParticlesAuxDyn:originalTrackParticle"));
2458 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer, RNTFieldReader>>("AnalysisJetsAuxDyn:GhostTrack"));
2459 knownColumns.push_back(std::make_shared<ColumnDataVectorLink<xAOD::JetContainer, RNTFieldReader>>("METAssoc_AnalysisMETAux:.jetLink"));
2460 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer, RNTFieldReader>>("METAssoc_AnalysisMETAux:.objectLinks"));
2461
2462 }
2463
2464
2465 // For METMaker we need to preplace all of the MET terms that we
2466 // expect to be used, that's what this line does.
2467 std::vector<std::string> allMetTermNames;
2468 for (const auto& td : testDefinitions)
2469 {
2470 for (const auto& name : td.metTermNames)
2471 {
2472 if (std::find (allMetTermNames.begin(), allMetTermNames.end(), name) == allMetTermNames.end())
2473 allMetTermNames.push_back (name);
2474 }
2475 }
2476
2477
2478 if (tree)
2479 {
2480 if (!allMetTermNames.empty())
2481 knownColumns.push_back(std::make_shared<ColumnDataOutputMet<BranchReader>>("OutputMET",allMetTermNames));
2482
2483 // For METMaker we need various extra columns to run. This may need
2484 // some work to avoid, but would likey be worth it.
2485 knownColumns.push_back(std::make_shared<ColumnDataOutVector<std::uint16_t, BranchReader>>("AnalysisMuons.objectType", xAOD::Type::Muon));
2486 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, BranchReader>>("AnalysisMuons.m", ParticleConstants::muonMassInMeV));
2487 knownColumns.push_back(std::make_shared<ColumnDataOutVector<std::uint16_t, BranchReader>>("AnalysisJets.objectType", xAOD::Type::Jet));
2488
2489 // These are columns that represent variables that are normally held
2490 // by METAssociationHelper, or alternatively are decorated on the
2491 // MET terms (even though they are per object).
2492 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, BranchReader>>("AnalysisMuons.MetObjectWeight", 0));
2493 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, BranchReader>>("AnalysisJets.MetObjectWeight", 0));
2494 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, BranchReader>>("AnalysisJets.MetObjectWeightSoft", 0));
2495 knownColumns.push_back(std::make_shared<ColumnDataOutVector<MissingETBase::Types::bitmask_t,BranchReader>>("METAssoc_AnalysisMET.useObjectFlags", 0));
2496 } else if (rntbackend)
2497 {
2498 if (!allMetTermNames.empty())
2499 knownColumns.push_back(std::make_shared<ColumnDataOutputMet<BranchReader>>("OutputMET",allMetTermNames));
2500
2501 // For METMaker we need various extra columns to run. This may need
2502 // some work to avoid, but would likey be worth it.
2503 knownColumns.push_back(std::make_shared<ColumnDataOutVector<std::uint16_t, RNTFieldReader>>("AnalysisMuons.objectType", xAOD::Type::Muon));
2504 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, RNTFieldReader>>("AnalysisMuons.m", ParticleConstants::muonMassInMeV));
2505 knownColumns.push_back(std::make_shared<ColumnDataOutVector<std::uint16_t, RNTFieldReader>>("AnalysisJets.objectType", xAOD::Type::Jet));
2506
2507 // These are columns that represent variables that are normally held
2508 // by METAssociationHelper, or alternatively are decorated on the
2509 // MET terms (even though they are per object).
2510 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, RNTFieldReader>>("AnalysisMuons.MetObjectWeight", 0));
2511 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, RNTFieldReader>>("AnalysisJets.MetObjectWeight", 0));
2512 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, RNTFieldReader>>("AnalysisJets.MetObjectWeightSoft", 0));
2513 knownColumns.push_back(std::make_shared<ColumnDataOutVector<MissingETBase::Types::bitmask_t,RNTFieldReader>>("METAssoc_AnalysisMET.useObjectFlags", 0));
2514 }
2515 } // namespace columnar
2516
2517 void ColumnarPhysLiteTest :: setupColumns (const ColumnVectorHeader& columnHeader)
2518 {
2519 using namespace asg::msgUserCode;
2520
2521 // Get all column info directly from the header (all tools have already
2522 // registered their columns via ToolColumnVectorMap)
2523 auto requestedColumns = columnHeader.getAllColumnInfo();
2524
2525 // Print requested columns
2526 for (auto& [name, info] : requestedColumns)
2527 std::cout << "requested columns: " << name << std::endl;
2528
2529 for (auto& column : knownColumns)
2530 {
2531 if (tree)
2532 {
2533 if (column->connect (tree, offsetColumns, requestedColumns))
2534 usedColumns.push_back (column);
2535 } else if (rntbackend)
2536 {
2537 if (column->connect(rntbackend, offsetColumns, requestedColumns))
2538 usedColumns.push_back(column);
2539 }
2540 }
2541
2542 std::set<std::string> unclaimedColumns;
2543 for (auto& column : requestedColumns)
2544 {
2545 if (!column.second.isOptional)
2546 unclaimedColumns.insert (column.first);
2547 else
2548 std::cout << "optional column not claimed: " << column.first << std::endl;
2549 }
2550 std::erase_if (unclaimedColumns, [&] (auto& columnName)
2551 {
2552 const auto& info = requestedColumns.at (columnName);
2553 if (info.accessMode != ColumnAccessMode::output || !info.fixedDimensions.empty())
2554 return false;
2555 auto offsetIter = std::find_if (usedColumns.begin(), usedColumns.end(), [&] (const std::shared_ptr<TestUtils::IColumnData>& column)
2556 {
2557 for (auto& output : column->outputColumns)
2558 {
2559 if (output.name == info.offsetName)
2560 return true;
2561 }
2562 return false;
2563 });
2564 if (offsetIter == usedColumns.end())
2565 return false;
2566 std::shared_ptr<TestUtils::IColumnData> myColumn;
2567 if (tree)
2568 {
2569 if (*info.type == typeid(float))
2570 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<float, BranchReader>>(info.name, 0);
2571 else if (*info.type == typeid(char))
2572 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<char, BranchReader>>(info.name, 0);
2573 else if (*info.type == typeid(std::uint16_t))
2574 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<std::uint16_t, BranchReader>>(info.name, 0);
2575 else if (*info.type == typeid(std::uint64_t))
2576 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<std::uint64_t, BranchReader>>(info.name, 0);
2577 else
2578 {
2579 ANA_MSG_WARNING("unhandled column type: " << info.name << " "<< info.type->name());
2580 return false;
2581 }
2582 } else if (rntbackend)
2583 {
2584 if (*info.type == typeid(float))
2585 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<float, RNTFieldReader>>(info.name,0);
2586 else if (*info.type == typeid(char))
2587 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<char, RNTFieldReader>>(info.name, 0);
2588 else if (*info.type == typeid(std::uint16_t))
2589 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<std::uint16_t, RNTFieldReader>>(info.name, 0);
2590 else if (*info.type == typeid(std::uint64_t))
2591 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<std::uint64_t, RNTFieldReader>>(info.name, 0);
2592 else
2593 {
2594 ANA_MSG_WARNING("unhandled column type: " << info.name << " " << info.type->name());
2595 return false;
2596 }
2597 }
2598 knownColumns.push_back(myColumn);
2599 if (tree) {
2600 if (!myColumn->connect(tree, offsetColumns, requestedColumns))
2601 {
2602 ANA_MSG_WARNING("failed to connect dynamic output column: " << info.name);
2603 return false;
2604 }
2605 } else if (rntbackend)
2606 {
2607 if (!myColumn->connect(rntbackend, offsetColumns, requestedColumns))
2608 {
2609 ANA_MSG_WARNING("failed to connect dynamic output column: " << info.name);
2610 return false;
2611 }
2612 }
2613 usedColumns.push_back(myColumn);
2614 return true;
2615 });
2616 if (!unclaimedColumns.empty())
2617 {
2618 std::string message = "columns not claimed:";
2619 for (auto& column : unclaimedColumns)
2620 message += " " + column;
2621 throw std::runtime_error(message);
2622 }
2623 }
2624
2625 void ColumnarPhysLiteTest :: doCall (const TestDefinition& testDefinition)
2626 {
2627 doCallMulti ({testDefinition});
2628 }
2629
2630
2631void ColumnarPhysLiteTest ::doCallMulti(
2632 const std::vector<TestDefinition>& testDefinitions) {
2633 using namespace asg::msgUserCode;
2634 auto userConfiguration = TestUtils::UserConfiguration::fromEnvironment();
2635
2636 // apply systematics for all test definitions
2637 for (const auto& td : testDefinitions) {
2638 if (!td.sysName.empty()) {
2639 auto* sysTool = dynamic_cast<CP::ISystematicsTool*>(td.tool);
2640 if (!sysTool)
2641 throw std::runtime_error("tool does not support systematics");
2642 std::cout << "applying systematic variation: " << td.sysName << std::endl;
2643 if (sysTool->applySystematicVariation(CP::SystematicSet(td.sysName))
2644 .isFailure())
2645 throw std::runtime_error("failed to apply systematic variation: " +
2646 td.sysName);
2647 }
2648 }
2649
2650 if constexpr (columnarAccessMode == 2) {
2651 // Create shared column header for all tools
2652 ColumnVectorHeader columnHeader;
2653
2654 // Build vector of ToolData from all testDefinitions
2655 std::vector<TestUtils::ToolData> toolDataVec;
2656 for (const auto& td : testDefinitions)
2657 toolDataVec.emplace_back(userConfiguration, td, columnHeader);
2658
2659 setupKnownColumns(testDefinitions);
2660 // Set up columns using the shared header (all tools have already
2661 // registered their columns via ToolColumnVectorMap, so we get all columns
2662 // from the header)
2663 setupColumns(columnHeader);
2664
2665 // connect column indices from header to each column for direct setting
2666 for (auto& column : usedColumns)
2667 column->connectColumnIndices(columnHeader);
2668
2669 Benchmark benchmarkEmpty("empty");
2670 Benchmark benchmarkCheck("", userConfiguration.batchSize);
2671 auto numberOfEvents = 0;
2672 if (tree) {
2673 numberOfEvents = tree->GetEntries();
2674 } else if (rntbackend) {
2675 numberOfEvents = rntreader->GetNEntries();
2676 }
2677 Long64_t entry = 0;
2678 const auto startTime = std::chrono::high_resolution_clock::now();
2679 bool endLoop = false;
2680 for (; !endLoop; ++entry) {
2681 // just sample how much overhead there is for starting and
2682 // stopping the timer
2683 benchmarkEmpty.startTimer();
2684 benchmarkEmpty.stopTimer();
2685 ColumnVectorData columnData(&columnHeader);
2686 for (auto& column : usedColumns)
2687 column->getEntry(entry % numberOfEvents);
2688 if ((entry + 1) % userConfiguration.batchSize == 0) {
2689 if (entry < numberOfEvents) {
2690 for (auto& column : usedColumns)
2691 column->collectColumnData();
2692 }
2693 for (auto& column : usedColumns)
2694 column->setData(columnData);
2695
2696 // Check data once (shared column data)
2697 benchmarkCheck.startTimer();
2698 columnData.checkData();
2699 benchmarkCheck.stopTimer();
2700 // Call each tool
2701 for (auto& toolData : toolDataVec) {
2702 toolData.call(columnData);
2703 }
2704 for (auto& column : usedColumns)
2705 column->clearColumns();
2706 if ((std::chrono::high_resolution_clock::now() - startTime) >
2707 userConfiguration.targetTime)
2708 endLoop = true;
2709 } else if (entry + 1 == numberOfEvents) {
2710 for (auto& column : usedColumns)
2711 column->collectColumnData();
2712 }
2713 }
2714 std::cout << "Entries in file: " << numberOfEvents << std::endl;
2715 std::cout << "Total entries read: " << entry << std::endl;
2716 const float emptyTime = benchmarkEmpty.getEntryTime(0).value();
2717 std::cout << "Empty benchmark time: " << emptyTime << "ns (tick=" << Benchmark::getTickDuration() << "ns)" << std::endl;
2718 benchmarkEmpty.setSilence();
2719 const auto checkTime = benchmarkCheck.getEntryTime(emptyTime);
2720 if (checkTime)
2721 std::cout << "Check data time: " << checkTime.value() << "ns" << std::endl;
2722 benchmarkCheck.setSilence();
2723 {
2724 std::vector<TestUtils::BranchPerfData> branchPerfData;
2726 summary.name = "total";
2727 summary.timeRead = 0;
2728 summary.timeUnpack = 0;
2729 summary.timeShallowCopy = 0;
2730 summary.entrySize = 0;
2731 summary.uncompressedSize = 0;
2732 summary.numBaskets = 0;
2733 summary.entries = std::nullopt;
2734 summary.nullEntries = std::nullopt;
2735 for (auto& column : usedColumns)
2736 {
2737 branchPerfData.push_back (column->getPerfData (emptyTime));
2738 summary.timeRead.value() += branchPerfData.back().timeRead.value_or(0);
2739 summary.timeUnpack.value() += branchPerfData.back().timeUnpack.value_or(0);
2740 summary.entrySize.value() += branchPerfData.back().entrySize.value_or(0);
2741 summary.uncompressedSize.value() += branchPerfData.back().uncompressedSize.value_or(0);
2742 summary.numBaskets.value() += branchPerfData.back().numBaskets.value_or(0);
2743 summary.timeShallowCopy.value() += branchPerfData.back().timeShallowCopy.value_or(0);
2744 }
2745 std::sort (branchPerfData.begin(), branchPerfData.end(), [] (const auto& a, const auto& b) {return a.name < b.name;});
2746 branchPerfData.insert (branchPerfData.end(), summary);
2747 const std::size_t nameWidth = std::max_element (branchPerfData.begin(), branchPerfData.end(), [] (const auto& a, const auto& b) {return a.name.size() < b.name.size();})->name.size();
2748 std::string label = userConfiguration.isrntuple ? "field name" : "branch name";
2749 std::string header = std::format ("{:{}} | read(ns) | unpack(ns) | size(B) | rate(MB/s) | compression | baskets | entries | null", label, nameWidth);
2750 std::cout << "\n" << header << std::endl;
2751 std::cout << std::string (header.size(), '-') << std::endl;
2752 for (auto& data : branchPerfData)
2753 {
2754 if (data.name == "total")
2755 std::cout << std::string (header.size(), '-') << std::endl;
2756 std::cout << std::format ("{:{}} |", data.name, nameWidth);
2757 if (data.timeRead)
2758 std::cout << std::format ("{:>9.0f} |", data.timeRead.value());
2759 else
2760 std::cout << " |";
2761 if (data.timeUnpack)
2762 std::cout << std::format ("{:>11.1f} |", data.timeUnpack.value());
2763 else
2764 std::cout << " |";
2765 if (data.entrySize)
2766 std::cout << std::format ("{:>8.1f} |", data.entrySize.value());
2767 else
2768 std::cout << " |";
2769 if (data.timeRead && data.entrySize)
2770 std::cout << std::format ("{:>11.1f} |", (data.entrySize.value() / (data.timeRead.value() * 1e-3 * 1.024 * 1.024)));
2771 else
2772 std::cout << " |";
2773 if (data.entrySize && data.uncompressedSize)
2774 std::cout << std::format ("{:>12.2f} |", float (data.uncompressedSize.value()) / data.entrySize.value());
2775 else
2776 std::cout << " |";
2777 if (data.numBaskets)
2778 std::cout << std::format ("{:>8} |", data.numBaskets.value());
2779 else
2780 std::cout << " |";
2781 if (data.entries)
2782 std::cout << std::format ("{:>8.2f} |", static_cast<float>(data.entries.value())/numberOfEvents);
2783 else
2784 std::cout << " |";
2785 if (data.nullEntries && data.entries)
2786 std::cout << std::format ("{:>4.0f}%", static_cast<float>(data.nullEntries.value()) / data.entries.value() * 100.0f);
2787 std::cout << std::endl;
2788 }
2789 }
2790 {
2791 std::vector<TestUtils::ToolPerfData> toolPerfData;
2792 for (auto& toolData : toolDataVec)
2793 {
2794 toolPerfData.emplace_back ();
2795 toolPerfData.back().name = toolData.name;
2796 toolPerfData.back().timeCall = toolData.benchmarkCall.getEntryTime (emptyTime);
2797 if (userConfiguration.runToolTwice)
2798 toolPerfData.back().timeCall2 = toolData.benchmarkCall2.getEntryTime (emptyTime);
2799 }
2800 const std::size_t nameWidth = std::max_element (toolPerfData.begin(), toolPerfData.end(), [] (const auto& a, const auto& b) {return a.name.size() < b.name.size();})->name.size();
2801 std::string header = std::format ("{:{}} | call(ns) | call2(ns)", "tool name", nameWidth);
2802 std::cout << "\n" << header << std::endl;
2803 std::cout << std::string (header.size(), '-') << std::endl;
2804 for (auto& data : toolPerfData)
2805 {
2806 std::cout << std::format ("{:{}} |", data.name, nameWidth);
2807 if (data.timeCall)
2808 std::cout << std::format ("{:>9.0f} |", data.timeCall.value());
2809 else
2810 std::cout << " |";
2811 if (data.timeCall2)
2812 std::cout << std::format ("{:>10.0f}", data.timeCall2.value());
2813 else
2814 std::cout << " ";
2815 std::cout << std::endl;
2816 }
2817 // Add totals line for multiple tools
2818 if (toolPerfData.size() > 1)
2819 {
2820 std::optional<float> totalCall, totalCall2;
2821 for (const auto& data : toolPerfData)
2822 {
2823 if (data.timeCall)
2824 totalCall = totalCall.value_or (0) + data.timeCall.value();
2825 if (data.timeCall2)
2826 totalCall2 = totalCall2.value_or (0) + data.timeCall2.value();
2827 }
2828 std::cout << std::string (header.size(), '-') << std::endl;
2829 std::cout << std::format ("{:{}} |", "total", nameWidth);
2830 if (totalCall)
2831 std::cout << std::format ("{:>9.0f} |", totalCall.value());
2832 else
2833 std::cout << " |";
2834 if (totalCall2)
2835 std::cout << std::format ("{:>10.0f}", totalCall2.value());
2836 else
2837 std::cout << " ";
2838 std::cout << std::endl;
2839 }
2840 }
2841 } else if constexpr (columnarAccessMode == 0)
2842 {
2843 TestUtils::runXaodTest (userConfiguration, testDefinitions, file.get());
2844 } else if constexpr (columnarAccessMode == 100)
2845 {
2846 const auto& testDefinition = testDefinitions[0];
2847 TestUtils::runXaodArrayTest (userConfiguration, testDefinition, file.get());
2848 }
2849 }
2850}
#define ANA_MSG_WARNING(xmsg)
Macro printing warning messages.
void checkTime()
int numberOfEvents()
char data[hepevt_bytes_allocation_ATLAS]
Definition HepEvt.cxx:11
static Double_t a
A number of constexpr particle constants to avoid hardcoding them directly in various places.
if(pathvar)
size_t size() const
Number of registered mappings.
Interface for all CP tools supporting systematic variations.
Class to wrap a set of SystematicVariations.
bool isDefault() const
Test to see if this link is in the default state.
a class that holds the columnar data for a single call
void checkData() const
do a basic check of the data vector
void setColumn(std::size_t columnIndex, std::size_t size, CT *dataPtr)
set the data for the given column
the header information for the entire columnar data vector
static constexpr std::size_t nullIndex
the index used for an invalid index (always has to be 0)
std::unordered_map< std::string, ColumnInfo > getAllColumnInfo() const
get all columns as a map of ColumnInfo for use with IColumnData::connect
the base class for all columnar components
this is a simple benchmarking helper class wrapping timers from std::chrono
Definition Benchmark.h:51
static float getTickDuration()
Definition Benchmark.h:86
std::optional< float > getEntryTime(float emptyTime) const
Definition Benchmark.h:74
BranchReaderArray(const std::string &val_branchName)
std::span< const T > getEntry(Long64_t entry, std::size_t size)
BranchReaderArray(const BranchReaderArray &)=delete
std::optional< float > uncompressedSize() const
BranchReaderArray & operator=(const BranchReaderArray &)=delete
const std::string & branchName() const
std::optional< float > entrySize() const
BranchReader(const BranchReader &)=delete
BranchReader(const std::string &val_branchName)
std::optional< unsigned > numBaskets()
BranchReader & operator=(const BranchReader &)=delete
std::optional< float > uncompressedSize() const
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, std::unordered_map< std::string, ColumnInfo > &requestedColumns)=0
virtual void collectColumnData()=0
std::vector< OutputColumnInfo > outputColumns
virtual void setData(ColumnVectorData &columnData)=0
virtual ~IColumnData() noexcept=default
virtual void getEntry(Long64_t entry)=0
void connectColumnIndices(const ColumnVectorHeader &header)
lookup and store column indices from the header for all enabled output columns
virtual BranchPerfData getPerfData(float emptyTime)=0
std::vector< typename CM::LinkIndexType > m_columnData
const CM::LinkIndexType * data() const noexcept
void addLink(const ElementLink< T > &element, unsigned eventIndex)
std::vector< const std::vector< ColumnarOffsetType > * > m_targetOffsetColumns
std::vector< std::vector< typename CM::LinkKeyType > > m_keysColumns
void addSplitLink(std::size_t linkIndex, SG::sgkey_t linkKey, unsigned eventIndex)
std::unordered_map< SG::sgkey_t, std::unordered_set< std::size_t > > m_unknownKeysAllowedTargets
const std::vector< typename CM::LinkKeyType > & keysColumn(std::size_t index) const
std::vector< std::string > connect(const ColumnInfo &columnInfo, const std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, const std::unordered_map< std::string, ColumnInfo > &requestedColumns)
void addTarget(const std::string &name, const std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns)
RNTFieldReader(const RNTFieldReader &)=delete
ROOT::Experimental::RNTupleInspector * m_inspector
void connectRNTuple(ROOT::RNTupleReader *reader, ROOT::Experimental::RNTupleInspector *inspector)
std::optional< float > entrySize() const
std::optional< float > uncompressedSize() const
RNTFieldReader & operator=(const RNTFieldReader &)=delete
std::unique_ptr< ROOT::RNTupleView< T > > m_view
RNTFieldReader(const std::string &val_fieldName)
std::string label(const std::string &format, int i)
Definition label.h:19
IAppMgrUI * Init(const char *options="POOLRootAccess/basic.opts")
Bootstraps (creates and configures) the Gaudi Application with the provided options file.
constexpr double muonMassInMeV
the mass of the muon (in MeV)
uint32_t sgkey_t
Type used for hashed StoreGate key+CLID pairs.
Definition sgkey_t.h:32
std::variant< TTree *, RNTupleBackend * > Backend
void runXaodArrayTest(const UserConfiguration &userConfiguration, const TestDefinition &testDefinition, TFile *file)
void runXaodTest(const UserConfiguration &userConfiguration, std::span< const TestDefinition > testDefinitions, TFile *file)
const std::unordered_map< std::string, SG::sgkey_t > knownKeys
constexpr unsigned columnarAccessMode
@ output
an output column
Definition ColumnInfo.h:24
void renameContainers(IColumnarTool &tool, const std::vector< std::pair< std::string, std::string > > &renames)
rename containers in the columnar tool
const std::string eventRangeColumnName
the default name for the column containing the event range
std::size_t ColumnarOffsetType
the type used for the size and offsets in the columnar data
constexpr ColumnarOffsetType invalidObjectIndex
the value for an invalid element index
Definition index.py:1
STL namespace.
void sort(typename DataModel_detail::iterator< DVL > beg, typename DataModel_detail::iterator< DVL > end)
Specialization of sort for DataVector/List.
std::size_t erase_if(T_container &container, T_Func pred)
@ Jet
The object is a jet.
Definition ObjectType.h:40
@ Muon
The object is a muon.
Definition ObjectType.h:48
StatusCode Init(const char *appname)
Function initialising ROOT/PyROOT for using the ATLAS EDM.
Definition Init.cxx:31
a struct that contains meta-information about each column that's needed to interface the column with ...
Definition ColumnInfo.h:35
std::string soleLinkTargetName
for simple link columns: the name of the target container
Definition ColumnInfo.h:131
std::string name
the name of the column
Definition ColumnInfo.h:42
std::size_t LinkIndexType
the type used for columns that represent element links
static LinkIndexType mergeLinkKeyIndex(LinkIndexType key, LinkIndexType index)
merge a key and index value into a link value
columnar::TestUtils::RNTupleBackend * rntbackend
std::vector< std::shared_ptr< TestUtils::IColumnData > > knownColumns
std::vector< std::shared_ptr< TestUtils::IColumnData > > usedColumns
std::unique_ptr< ROOT::Experimental::RNTupleInspector > inspector
std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > offsetColumns
void setupKnownColumns(std::span< const TestUtils::TestDefinition > testDefinitions)
void setupColumns(const ColumnVectorHeader &columnHeader)
std::unique_ptr< ROOT::RNTupleReader > rntreader
void doCallMulti(const std::vector< TestUtils::TestDefinition > &testDefinitions)
the performance data for reading a single branch/column
virtual void setData(ColumnVectorData &columnData) override
std::array< ColumnarOffsetType, 2 > data
virtual BranchPerfData getPerfData(float) override
virtual bool connect(Backend, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
std::vector< ColumnarOffsetType > offsets
Reader< std::vector< std::string > > branchReader
ColumnDataMetNames(const std::string &val_branchName)
virtual BranchPerfData getPerfData(float emptyTime) override
virtual void getEntry(Long64_t entry) override
virtual void setData(ColumnVectorData &colData) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
const std::vector< ColumnarOffsetType > * offsetColumn
virtual void setData(ColumnVectorData &columnData) override
ColumnDataOutVector(const std::string &val_columnName, const T &val_defaultValue)
virtual BranchPerfData getPerfData(float) override
std::vector< ColumnarOffsetType > namesOffsets
const std::vector< ColumnarOffsetType > * offsetColumns
virtual BranchPerfData getPerfData(float) override
std::vector< ColumnarOffsetType > offsets
virtual void setData(ColumnVectorData &colData) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
ColumnDataOutputMet(const std::string &val_columnName, std::vector< std::string > val_termNames)
virtual void setData(ColumnVectorData &colData) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
virtual BranchPerfData getPerfData(float emptyTime) override
ColumnDataSamplingPattern(const std::string &val_branchName)
Reader< xAOD::CaloClusterContainer > branchReader
virtual void getEntry(Long64_t entry) override
virtual void setData(ColumnVectorData &columnData) override
ColumnDataScalar(const std::string &val_branchName)
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
virtual void getEntry(Long64_t entry) override
virtual BranchPerfData getPerfData(float emptyTime) override
ColumnDataVectorVectorVector(const std::string &val_branchName)
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
virtual BranchPerfData getPerfData(float emptyTime) override
Reader< std::vector< std::vector< std::vector< T > > > > branchReader
virtual void setData(ColumnVectorData &colData) override
virtual BranchPerfData getPerfData(float emptyTime) override
Reader< std::vector< std::vector< T > > > branchReader
virtual void setData(ColumnVectorData &colData) override
virtual void getEntry(Long64_t entry) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
ColumnDataVectorVector(const std::string &val_branchName)
virtual void getEntry(Long64_t entry) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
const std::vector< ColumnarOffsetType > * offsetColumn
std::vector< ColumnarOffsetType > offsets
ColumnDataVector(const std::string &val_branchName)
virtual void setData(ColumnVectorData &columnData) override
virtual BranchPerfData getPerfData(float emptyTime) override
ROOT::Experimental::RNTupleInspector * inspector
std::vector< std::string > metTermNames
the MET output term names (if empty, MET output columns are omitted)
std::vector< std::pair< std::string, std::string > > containerRenames
the container name remappings to apply
static UserConfiguration fromEnvironment()
create a UserConfiguration, loading from the file pointed to by the COLUMNAR_TEST_CONFIG environment ...
TChain * tree