ATLAS Offline Software
Loading...
Searching...
No Matches
ColumnarPhysliteTest.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
6
7//
8// includes
9//
10
12
13#include <AsgTesting/UnitTest.h>
34
39
40#ifndef XAOD_STANDALONE
42#endif
43
44#include <TFile.h>
45#include <TLeaf.h>
46#include <TTree.h>
47
48#include "ROOT/RNTuple.hxx"
49#include "ROOT/RNTupleInspector.hxx"
50#include "ROOT/RNTupleReader.hxx"
51#include <ROOT/RNTupleView.hxx>
52
53#include <boost/core/demangle.hpp>
54
55#include <algorithm>
56#include <chrono>
57#include <cstdint>
58#include <format>
59#include <memory>
60#include <span>
61#include <vector>
62
63#include <gtest/gtest.h>
64
65//
66// method implementations
67//
68
69namespace columnar
70{
71 // I'm moving code to this namespace but some of the code in this file
72 // is still just in the columnar namespace. As I evolve the code I'll
73 // move more of it to TestUtils.
74 using namespace TestUtils;
75
76 namespace TestUtils
77 {
78
80 ROOT::RNTupleReader* reader = nullptr;
81 ROOT::Experimental::RNTupleInspector* inspector = nullptr;
82 };
83
84 using Backend = std::variant<TTree*, RNTupleBackend*>;
85 template <typename T>
86 class BranchReader final
87 {
88 std::string m_branchName;
89 TBranch* m_branch = nullptr;
90 bool m_isStatic = std::is_pod_v<T>;
91 T* m_data{new T()};
92
93 public:
94 BranchReader(const std::string& val_branchName)
95 : m_branchName(val_branchName)
96 {
97 if (m_branchName.find("Aux.") != std::string::npos)
98 m_isStatic = true;
99 }
100
101 ~BranchReader() noexcept
102 {
103 delete m_data;
104 }
105
106 BranchReader(const BranchReader&) = delete;
108
109 void setIsStatic(bool isStatic)
110 {
111 m_isStatic = isStatic;
112 }
113
114 [[nodiscard]] const std::string& branchName() const
115 {
116 return m_branchName;
117 }
118
119 [[nodiscard]] std::string columnName() const
120 {
121 std::string columnName = m_branchName;
122 if (auto index = columnName.find("AuxDyn."); index != std::string::npos)
123 columnName.replace(index, 6, "");
124 else if (auto index = columnName.find("Aux."); index != std::string::npos)
125 columnName.replace(index, 3, "");
126 else if (columnName.find(".") != std::string::npos)
127 throw std::runtime_error("branch name does not contain AuxDyn or Aux: " +m_branchName);
128 return columnName;
129 }
130
131 [[nodiscard]] std::string containerName() const
132 {
133 if (auto index = m_branchName.find("AuxDyn."); index != std::string::npos)
134 return m_branchName.substr(0, index);
135 else if (auto index = m_branchName.find("Aux."); index != std::string::npos)
136 return m_branchName.substr(0, index);
137 else if (m_branchName.find(".") == std::string::npos)
138 return m_branchName;
139 else
140 throw std::runtime_error("branch name does not contain AuxDyn or Aux: " +m_branchName);
141 }
142
143 void connectTree(TTree* tree)
144 {
145 m_branch = tree->GetBranch(m_branchName.c_str());
146 if (!m_branch)
147 throw std::runtime_error("failed to get branch: " + m_branchName);
148 m_branch->SetMakeClass(1);
149 if (m_isStatic)
150 m_branch->SetAddress(m_data);
151 else
152 m_branch->SetAddress(&m_data);
153 }
154
155 void connectTree(const Backend& b)
156 {
157 auto* tree = std::get<TTree*>(b); // throws if wrong backend
159 }
160
161 const T& getEntry(Long64_t entry)
162 {
163 if (!m_branch)
164 throw std::runtime_error("branch not connected: " + m_branchName);
165 if (m_branch->GetEntry(entry) <= 0)
166 throw std::runtime_error("failed to get entry " + std::to_string(entry) + " for branch: " + m_branchName);
167 if (m_data == nullptr)
168 throw std::runtime_error("got nullptr reading data for branch: " + m_branchName);
169 return *m_data;
170 }
171
172 const T& getCachedEntry() const
173 {
174 return *m_data;
175 }
176
177 std::optional<float> entrySize() const
178 {
179 if (!m_branch)
180 return std::nullopt;
181 return static_cast<float>(m_branch->GetZipBytes()) / m_branch->GetEntries();
182 }
183
184 std::optional<float> uncompressedSize() const
185 {
186 if (!m_branch)
187 return std::nullopt;
188 return static_cast<float>(m_branch->GetTotBytes()) / m_branch->GetEntries();
189 }
190
191 // technically this is const-correct, but I don't want to convince
192 // the code checker of that
193 std::optional<unsigned> numBaskets()
194 {
195 if (!m_branch)
196 return std::nullopt;
197 return m_branch->GetListOfBaskets()->GetSize();
198 }
199 };
200
201 template <typename T>
202 class BranchReaderArray final
203 {
204 public:
205 std::string m_branchName;
206 TBranch* m_branch = nullptr;
207 std::vector<T> m_dataVec;
208
209 public:
210 BranchReaderArray(const std::string& val_branchName)
211 : m_branchName(val_branchName)
212 {}
213
216
217 [[nodiscard]] std::string columnName() const
218 {
219 std::string columnName = m_branchName;
220 if (auto index = columnName.find("AuxDyn."); index != std::string::npos)
221 columnName.replace(index, 6, "");
222 else if (auto index = columnName.find("Aux."); index != std::string::npos)
223 columnName.replace(index, 3, "");
224 else if (columnName.find(".") != std::string::npos)
225 throw std::runtime_error("branch name does not contain AuxDyn or Aux: " + m_branchName);
226 return columnName;
227 }
228
229 [[nodiscard]] std::string containerName() const
230 {
231 if (auto index = m_branchName.find("AuxDyn."); index != std::string::npos)
232 return m_branchName.substr(0, index);
233 else if (auto index = m_branchName.find("Aux."); index != std::string::npos)
234 return m_branchName.substr(0, index);
235 else if (m_branchName.find(".") == std::string::npos)
236 return m_branchName;
237 else
238 throw std::runtime_error("branch name does not contain AuxDyn or Aux: " + m_branchName);
239 }
240 void connectTree (TTree *tree)
241 {
242 m_branch = tree->GetBranch (m_branchName.c_str());
243 if (!m_branch)
244 throw std::runtime_error ("failed to get branch: " + m_branchName);
245 m_branch->SetMakeClass (1);
246 // FIX ME: I have to have some hard-coded size, see explanation
247 // below.
248 m_dataVec.resize (100);
249 if (!m_dataVec.empty())
250 m_branch->SetAddress (m_dataVec.data());
251 }
252
253 std::span<const T> getEntry (Long64_t entry, std::size_t size)
254 {
255 if (!m_branch)
256 throw std::runtime_error ("branch not connected: " + m_branchName);
257 if (m_dataVec.size() < size)
258 {
259 // FIX ME: in one of the latest releases the repointing below
260 // breaks, and causes memory corruption. so I'm now
261 // preallocating and fail rather than reallocate, and the
262 // problem goes away. maybe it should be investigated at some
263 // point, but this is a test and I already spend a fair amount
264 // of time investigating this. the harm is that this test
265 // consumes a few hundreds bytes more in memory and we may have
266 // to occasionally increase the buffer size to cover all test
267 // files and branch lengths.
268 throw std::runtime_error ("requested size exceeds buffer size for branch: " + m_branchName);
269 // m_dataVec.resize (size);
270 // m_branch->SetAddress (m_dataVec.data());
271 }
272 if (size > 0 && m_branch->GetEntry (entry) <= 0)
273 throw std::runtime_error ("failed to get entry " + std::to_string (entry) + " for branch: " + m_branchName);
274 return std::span<const T>(m_dataVec.data(), size);
275 }
276
277 std::optional<float> entrySize () const
278 {
279 if (!m_branch)
280 return std::nullopt;
281 return static_cast<float>(m_branch->GetZipBytes()) / m_branch->GetEntries();
282 }
283
284 std::optional<float> uncompressedSize () const
285 {
286 if (!m_branch)
287 return std::nullopt;
288 return static_cast<float>(m_branch->GetTotBytes()) / m_branch->GetEntries();
289 }
290
291 // technically this is const-correct, but I don't want to convince
292 // the code checker of that
293 std::optional<unsigned> numBaskets ()
294 {
295 if (!m_branch)
296 return std::nullopt;
297 return m_branch->GetListOfBaskets()->GetSize();
298 }
299 };
300
302 {
305 public:
306
308
310 {
311 if (!m_unknownKeysAllowedTargets.empty())
312 {
313 std::cout << "found unknown keys for " << m_columnName << ":";
314 for (auto& [key, allowedSet] : m_unknownKeysAllowedTargets)
315 {
316 std::cout << " " << std::hex << key << std::dec << " (allowed targets:";
317 for (auto index : allowedSet)
318 std::cout << " " << m_targetNames.at(index);
319 std::cout << ")";
320 }
321 }
322 }
323
324 [[nodiscard]] std::vector<std::string> connect (const ColumnInfo& columnInfo, const std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& offsetColumns, const std::unordered_map<std::string,ColumnInfo>& requestedColumns)
325 {
326 m_columnName = columnInfo.name;
327 std::vector<std::string> keyColumnNames;
328 if (!columnInfo.soleLinkTargetName.empty())
329 {
330 addTarget (columnInfo.soleLinkTargetName, offsetColumns, columnInfo.soleLinkTargetClid);
331 } else
332 {
333 for (auto& [requestedName, requestedInfo] : requestedColumns)
334 {
335 if (requestedInfo.keyColumnForVariantLink == m_columnName)
336 {
337 keyColumnNames.push_back (requestedName);
338 m_keysColumns.emplace_back();
339 for (const auto& targetName : requestedInfo.variantLinkTargetNames)
340 addTarget (targetName, offsetColumns);
341 }
342 }
343 if (m_keysColumns.empty())
344 throw std::runtime_error ("no key column found for variant link: " + m_columnName);
345 }
346 return keyColumnNames;
347 }
348
349 void clear ()
350 {
351 m_columnData.clear();
352 }
353
354 void checkOffsets (unsigned eventIndex)
355 {
356 for (std::size_t i = 0; i < m_targetNames.size(); ++ i)
357 {
358 auto& targetOffsetColumn = *m_targetOffsetColumns.at(i);
359 if (eventIndex + 1 >= targetOffsetColumn.size())
360 throw std::runtime_error ("target offset column not yet filled for: " + m_targetNames.at(i) + " when checking link column " + m_columnName);
361 }
362 }
363
364 template<typename T>
365 void addLink (const ElementLink<T>& element, unsigned eventIndex)
366 {
367 if (element.isDefault())
368 {
369 addEmptyLink();
370 return;
371 }
372
373 addSplitLink (element.index(), element.key(), eventIndex);
374 }
375
377 {
379 }
380
381 void addSplitLink (std::size_t linkIndex, SG::sgkey_t linkKey, unsigned eventIndex)
382 {
383 if (linkIndex == 0 && linkKey == 0)
384 {
385 addEmptyLink();
386 return;
387 }
388
389 unsigned targetIndex = 0u;
390 while (targetIndex < m_targetKeys.size() && m_targetKeys.at(targetIndex) != linkKey)
391 ++ targetIndex;
392
393 // We didn't find the key, so we try to figure out which of the
394 // targets it could be. The idea is that you wouldn't rely on
395 // this for real tests, but that you then go and fill in those
396 // keys in the central lookup table. It will always record and
397 // report, that means if there is a variant link with extra
398 // targets you didn't declare you will get a diagnostic. This
399 // may be overly cautious, but it gives an extra diagnostic if
400 // maybe you missed a target.
401 if (targetIndex == m_targetKeys.size())
402 {
403 if (!m_unknownKeysAllowedTargets.contains (linkKey))
404 {
405 auto& allowedSet = m_unknownKeysAllowedTargets[linkKey];
406 for (std::size_t i = 0; i < m_targetKeys.size(); ++ i)
407 {
408 if (m_targetKeys.at(i) == 0)
409 allowedSet.insert(i);
410 }
411 }
412 auto& allowedSet = m_unknownKeysAllowedTargets[linkKey];
413 for (auto iter = allowedSet.begin(); iter != allowedSet.end();)
414 {
415 auto index = *iter;
416 auto& targetOffsetColumn = *m_targetOffsetColumns.at(index);
417 if (eventIndex + 1 >= targetOffsetColumn.size())
418 throw std::runtime_error ("target offset column not yet filled for: " + m_targetNames.at(index));
419 if (targetOffsetColumn.at(eventIndex) + linkIndex >= targetOffsetColumn.at(eventIndex + 1))
420 iter = allowedSet.erase(iter);
421 else
422 ++ iter;
423 }
424 // Not quite sure whether it is safer to use or not use one of
425 // the targets from the allowed set in this case. In general
426 // tools are expected to handle invalid links gracefully,
427 // worst case they throw an exception when trying to access
428 // it. So what I came up with is that for variant links we
429 // assume it invalid, but for non-variant links the tool
430 // expects exactly one target and we either found it or throw
431 // an exception.
432 if (m_keysColumns.empty())
433 {
434 if (allowedSet.size() == 1 && m_targetKeys.at(*allowedSet.begin()) == 0 && m_unknownKeysAllowedTargets.size() == 1)
435 targetIndex = *allowedSet.begin();
436 else
437 {
438 std::ostringstream error;
439 error << "target key mismatch: read sgkey " << std::hex << linkKey << std::dec;
440 error << " for column " << m_columnName << " with element index " << linkIndex << " targeting " << m_targetNames.at(0);
441 if (m_targetKeys.at(0) != 0u)
442 {
443 error << ", expected sgkey " << std::hex << m_targetKeys.at(0) << std::dec;
444 } else if (m_unknownKeysAllowedTargets.size() > 1)
445 {
446 error << ", alternate key found for non-variant link:";
447 for (auto& [key, allowedSet] : m_unknownKeysAllowedTargets)
448 {
449 if (key != linkKey)
450 error << " " << std::hex << key << std::dec;
451 }
452 } else
453 {
454 error << ", no expected sgkey configured but the maximum allowed index for the target is " << m_targetOffsetColumns.at(0)->at(eventIndex + 1) - m_targetOffsetColumns.at(0)->at(eventIndex) - 1;
455 }
456 throw std::runtime_error (std::move (error).str());
457 }
458 }
459 }
460
461 if (targetIndex == m_targetKeys.size())
462 {
463 // this creates a link with an unknown key, which the user
464 // will ignore
465 m_columnData.push_back (CM::mergeLinkKeyIndex (0xff, linkIndex));
466 return;
467 }
468
469 auto& targetOffsetColumn = *m_targetOffsetColumns.at(targetIndex);
470 if (eventIndex + 1 >= targetOffsetColumn.size())
471 throw std::runtime_error ("target offset column not yet filled for: " + m_targetNames.at(targetIndex));
472 auto myLinkIndex = linkIndex + targetOffsetColumn.at(eventIndex);
473 if (myLinkIndex >= targetOffsetColumn.at(eventIndex + 1))
474 throw std::runtime_error ("index out of range for link: " + m_columnName + " with element index " + std::to_string(linkIndex) + " targeting " + m_targetNames.at(targetIndex) + " with offset " + std::to_string(targetOffsetColumn.at(eventIndex)) + " and next offset " + std::to_string(targetOffsetColumn.at(eventIndex + 1)));
475
476 m_columnData.push_back (CM::mergeLinkKeyIndex (targetIndex, myLinkIndex));
477 }
478
479 [[nodiscard]] std::size_t size () const noexcept
480 {
481 return m_columnData.size();
482 }
483
484 [[nodiscard]] const typename CM::LinkIndexType* data () const noexcept
485 {
486 return m_columnData.data();
487 }
488
489 [[nodiscard]] auto begin () const noexcept { return m_columnData.begin(); }
490 [[nodiscard]] auto end () const noexcept { return m_columnData.end(); }
491
492 [[nodiscard]] const std::vector<typename CM::LinkKeyType>& keysColumn (std::size_t index) const
493 {
494 return m_keysColumns.at(index);
495 }
496
497
498
501 private:
502
503 std::vector<typename CM::LinkIndexType> m_columnData;
504
505 std::string m_columnName;
506
507 std::vector<std::string> m_targetNames;
508 std::vector<SG::sgkey_t> m_targetKeys;
509 std::vector<const std::vector<ColumnarOffsetType>*> m_targetOffsetColumns;
510
511 // there can be multiple keys-columns, hence this is a vector of
512 // vectors. if this is empty, then it is a single-target link
513 std::vector<std::vector<typename CM::LinkKeyType>> m_keysColumns;
514
515 std::unordered_map<SG::sgkey_t,std::unordered_set<std::size_t>> m_unknownKeysAllowedTargets;
516
517
518
519 void addTarget (const std::string& name, const std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& offsetColumns, std::uint32_t clid = 0)
520 {
521 unsigned targetIndex = 0;
522 while (targetIndex < m_targetNames.size() && m_targetNames.at(targetIndex) != name)
523 ++ targetIndex;
524 if (targetIndex == m_targetNames.size())
525 {
526 m_targetNames.push_back(name);
527 if (auto offsetIter = offsetColumns.find (name); offsetIter != offsetColumns.end())
528 m_targetOffsetColumns.push_back (offsetIter->second);
529 else
530 throw std::runtime_error ("missing offset column: " + name);
531 if (clid != 0)
532 m_targetKeys.push_back (computeSgKey (name, clid));
533 else if (auto keyIter = knownSgKeys.find (name); keyIter != knownSgKeys.end())
534 m_targetKeys.push_back (keyIter->second);
535 else
536 m_targetKeys.push_back (0);
537 }
538 if (!m_keysColumns.empty())
539 m_keysColumns.back().push_back (targetIndex);
540 }
541 };
542
543 template <typename T>
544 class RNTFieldReader final
545 {
546 std::string m_FieldName;
547 std::unique_ptr<ROOT::RNTupleView<T>> m_view;
548 ROOT::Experimental::RNTupleInspector* m_inspector = nullptr;
549 ROOT::RNTupleReader* m_reader = nullptr;
550 const T* m_data = nullptr;
551
552 public:
553 RNTFieldReader(const std::string& val_fieldName)
554 : m_FieldName(val_fieldName)
555 {}
556
557 ~RNTFieldReader() noexcept {}
560
561 [[nodiscard]] const std::string& fieldName() const
562 {
563 return m_FieldName;
564 }
565
566 [[nodiscard]] std::string columnName() const
567 {
568 std::string columnName = m_FieldName;
569 if (auto index = columnName.find("AuxDyn:"); index != std::string::npos)
570 columnName.replace(index, 6, "");
571 else if (auto index = columnName.find("Aux:."); index != std::string::npos)
572 columnName.replace(index, 4, "");
573 else if (auto index = columnName.find("Aux:"); index != std::string::npos)
574 columnName.replace(index, 3, "");
575 else if (columnName.find(":") != std::string::npos)
576 throw std::runtime_error("field name does not contain AuxDyn or Aux: " + m_FieldName);
577 std::replace(columnName.begin(), columnName.end(), ':', '.');
578
579 return columnName;
580 }
581
582 [[nodiscard]] std::string containerName() const
583 {
584 if (auto index = m_FieldName.find("AuxDyn:"); index != std::string::npos)
585 return m_FieldName.substr(0, index);
586 else if (auto index = m_FieldName.find("Aux:"); index != std::string::npos)
587 return m_FieldName.substr(0, index);
588 else if (m_FieldName.find(":") == std::string::npos)
589 return m_FieldName;
590 else
591 throw std::runtime_error("field name does not contain AuxDyn or Aux: " + m_FieldName);
592 }
593
594 void connectRNTuple(ROOT::RNTupleReader* reader,
595 ROOT::Experimental::RNTupleInspector* inspector)
596 {
597 m_inspector = inspector;
598 m_reader = reader;
599 m_view = std::make_unique<ROOT::RNTupleView<T>>(reader->GetView<T>(m_FieldName));
600
601 if (!m_view)
602 throw std::runtime_error("failed to get field: " + m_FieldName);
603 }
604
605 void connectTree(const Backend& b)
606 {
607 auto* rntbackend = std::get<RNTupleBackend*>(b); // throws if wrong backend
608
609 if (!rntbackend->reader || !rntbackend->inspector)
610 throw std::runtime_error("RNTuple backend not properly initialized");
611 connectRNTuple(rntbackend->reader, rntbackend->inspector);
612 }
613
614
615 const T& getEntry(Long64_t entry)
616 {
617 if (!m_view)
618 throw std::runtime_error("field not connected: " + m_FieldName);
619
620 m_data = &((*m_view)(static_cast<ROOT::NTupleSize_t>(entry)));
621
622 if (m_data == nullptr)
623 throw std::runtime_error("got nullptr reading data for field: " + m_FieldName);
624 return *m_data;
625 }
626
627 const T& getCachedEntry() const
628 {
629 return *m_data;
630 }
631
632 std::optional<float> entrySize() const
633 {
634
635 const ROOT::Experimental::RNTupleInspector::RFieldTreeInspector& fieldTreeInspector = m_inspector->GetFieldTreeInspector(m_FieldName);
636 return static_cast<float>(fieldTreeInspector.GetCompressedSize()) /
637 m_inspector->GetDescriptor().GetNEntries();
638 }
639
640 std::optional<float> uncompressedSize() const
641 {
642
643 const ROOT::Experimental::RNTupleInspector::RFieldTreeInspector& fieldTreeInspector = m_inspector->GetFieldTreeInspector(m_FieldName);
644
645 return static_cast<float>(fieldTreeInspector.GetUncompressedSize()) /
646 m_inspector->GetDescriptor().GetNEntries();
647 }
648
649 std::optional<unsigned> numBaskets()
650 {
651 // placeholder
652 return std::nullopt;
653 }
654 };
655
657 {
658 public:
659
661 {
662 std::string name;
663 bool isOffset = false;
664 bool primary = false;
665 bool enabled = false;
667 };
668 std::vector<OutputColumnInfo> outputColumns;
669
670 virtual ~IColumnData () noexcept = default;
671
672 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) = 0;
673
676 {
677 for (auto& col : outputColumns)
678 {
679 if (col.enabled)
680 col.columnIndex = header.getColumnIndex (col.name);
681 }
682 }
683
684 virtual void clearColumns () = 0;
685
686 virtual void getEntry (Long64_t entry) = 0;
687
688 virtual void setData (ColumnVectorData& columnData) = 0;
689
690 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) = 0;
691
692 virtual void collectColumnData () = 0;
693 };
694
696 {
697 std::array<ColumnarOffsetType, 2> data = {0, 0};
698
700 {
701 outputColumns.push_back ({.name = eventRangeColumnName, .isOffset = true});
702 }
703
704 virtual bool connect(Backend /*source*/, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/,std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
705 {
706 if (requestedColumns.contains(outputColumns.at(0).name))
707 {
708 requestedColumns.erase(outputColumns.at(0).name);
709 outputColumns.at(0).enabled = true;
710 return true;
711 }
712 return false;
713 }
714 virtual void clearColumns () override
715 {
716 data[0] = 0;
717 data[1] = 0;
718 }
719
720 virtual void getEntry (Long64_t /*entry*/) override
721 {
722 data[1] += 1;
723 }
724
725 virtual void setData (ColumnVectorData& columnData) override
726 {
727 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
728 columnData.setColumn (outputColumns.at(0).columnIndex, data.size(), data.data());
729 }
730
731 [[nodiscard]] virtual BranchPerfData getPerfData (float /*emptyTime*/) override
732 {
733 BranchPerfData result;
734 result.name = "EventCount(auto)";
735 return result;
736 }
737
738 virtual void collectColumnData () override
739 {}
740 };
741
742
743 template <typename T, template <typename> class Reader>
745 {
746 Reader<T> branchReader;
749 std::vector<T> outData;
750 unsigned entries = 0;
751
752 explicit ColumnDataScalar (const std::string& val_branchName)
753 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
754 {
755 outputColumns.push_back ({.name = branchReader.columnName()});
756 }
757
758 virtual bool connect( Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/,std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
759 {
760 auto iter = requestedColumns.find (outputColumns.at(0).name);
761 if (iter == requestedColumns.end())
762 return false;
763 outputColumns.at(0).enabled = true;
764 requestedColumns.erase (iter);
765
766 branchReader.connectTree (source);
767
768 return true;
769 }
770 virtual void clearColumns () override
771 {
772 outData.clear ();
773 }
774
775 virtual void getEntry (Long64_t entry) override
776 {
777 benchmark.startTimer ();
778 const auto& branchData = branchReader.getEntry (entry);
779 benchmark.stopTimer ();
780 benchmarkUnpack.startTimer ();
781 outData.push_back (branchData);
782 benchmarkUnpack.stopTimer ();
783 }
784
785 virtual void setData (ColumnVectorData& columnData) override
786 {
787 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
788 columnData.setColumn (outputColumns.at(0).columnIndex, outData.size(), outData.data());
789 }
790
791 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
792 {
793 BranchPerfData result;
794 result.name = branchReader.columnName();
795 result.timeRead = benchmark.getEntryTime(emptyTime);
796 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
797 benchmark.setSilence();
798 benchmarkUnpack.setSilence();
799 result.entrySize = branchReader.entrySize();
800 result.uncompressedSize = branchReader.uncompressedSize();
801 result.numBaskets = branchReader.numBaskets();
802 result.entries = entries;
803 return result;
804 }
805
806 virtual void collectColumnData () override
807 {
808 entries += outData.size();
809 }
810 };
811
812 template <typename T, template <typename> class Reader>
814 {
815 Reader<std::vector<T>> branchReader;
816 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
817 std::vector<ColumnarOffsetType> offsets = {0};
818 std::vector<T> outData;
821 unsigned entries = 0;
822
823 explicit ColumnDataVector (const std::string& val_branchName)
824 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
825 {
826 outputColumns.push_back ({.name = branchReader.columnName()});
827 outputColumns.push_back ({.name = branchReader.containerName(), .isOffset = true, .primary = false});
828 }
829
830 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
831 {
832 auto iter = requestedColumns.find (outputColumns.at(0).name);
833 if (iter == requestedColumns.end())
834 return false;
835 outputColumns.at(0).enabled = true;
836
837 branchReader.connectTree(source);
838
839 if (iter->second.offsetName != outputColumns.at(1).name)
840 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
841
842 requestedColumns.erase (iter);
843
844 if (auto offsetIter = offsetColumns.find (outputColumns.at(1).name); offsetIter != offsetColumns.end())
845 offsetColumn = offsetIter->second;
846 else
847 offsetColumns.emplace (outputColumns.at(1).name, &offsets);
848
849 iter = requestedColumns.find (outputColumns.at(1).name);
850 if (iter != requestedColumns.end())
851 {
852 requestedColumns.erase (iter);
853 outputColumns.at(1).enabled = true;
854 }
855
856 return true;
857 }
858
859 virtual void clearColumns () override
860 {
861 offsets.clear ();
862 offsets.push_back (0);
863 outData.clear ();
864 }
865
866 virtual void getEntry (Long64_t entry) override
867 {
868 benchmark.startTimer ();
869 const auto& branchData = branchReader.getEntry (entry);
870 benchmark.stopTimer ();
871 benchmarkUnpack.startTimer ();
872 outData.insert (outData.end(), branchData.begin(), branchData.end());
873 offsets.push_back (outData.size());
874 benchmarkUnpack.stopTimer ();
875 }
876
877 virtual void setData (ColumnVectorData& columnData) override
878 {
879 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
880 columnData.setColumn (outputColumns.at(0).columnIndex, outData.size(), outData.data());
881 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
882 columnData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
883 if (offsetColumn)
884 {
885 if (offsetColumn->size() != offsets.size())
886 throw std::runtime_error ("offset column not filled yet: " + outputColumns.at(1).name);
887 if (offsetColumn->back() != offsets.back())
888 throw std::runtime_error ("offset column does not match: " + outputColumns.at(1).name);
889 }
890 }
891
892 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
893 {
894 BranchPerfData result;
895 result.name = branchReader.columnName();
896 result.timeRead = benchmark.getEntryTime(emptyTime);
897 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
898 benchmark.setSilence();
899 benchmarkUnpack.setSilence();
900 result.entrySize = branchReader.entrySize();
901 result.uncompressedSize = branchReader.uncompressedSize();
902 result.numBaskets = branchReader.numBaskets();
903 result.entries = entries;
904 return result;
905 }
906
907 virtual void collectColumnData () override
908 {
909 entries += outData.size();
910 }
911 };
912
913 template <typename T, template <typename> class Reader>
915 {
917 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
918 std::vector<T> outData;
919 unsigned entries = 0;
920
921 ColumnDataOutVector (const std::string& val_columnName, const T& val_defaultValue)
922 : defaultValue (val_defaultValue)
923 {
924 outputColumns.push_back ({.name = val_columnName});
925 }
926
927 virtual bool connect([[maybe_unused]]Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
928 {
929 auto iter = requestedColumns.find (outputColumns.at(0).name);
930 if (iter == requestedColumns.end())
931 return false;
932 outputColumns.at(0).enabled = true;
933
934 // WARNING: absolutely do not switch the next line to a
935 // reference, the pointed to element gets deleted below.
936 const auto offsetName = iter->second.offsetName;
937 if (offsetName.empty())
938 throw std::runtime_error ("missing offset column for: " + outputColumns.at(0).name);
939
940 requestedColumns.erase (iter);
941
942 if (auto offsetIter = offsetColumns.find (offsetName); offsetIter != offsetColumns.end())
943 offsetColumn = offsetIter->second;
944 else
945 throw std::runtime_error ("missing offset column for: " + outputColumns.at(0).name);
946 return true;
947 }
948
949 virtual void clearColumns () override
950 {
951 outData.clear ();
952 }
953
954 virtual void getEntry (Long64_t /*entry*/) override
955 {
956 outData.resize (offsetColumn->back(), defaultValue);
957 }
958
959 virtual void setData (ColumnVectorData& columnData) override
960 {
961 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
962 columnData.setColumn (outputColumns.at(0).columnIndex, outData.size(), outData.data());
963 }
964
965 [[nodiscard]] virtual BranchPerfData getPerfData (float /*emptyTime*/) override
966 {
967 BranchPerfData result;
968 result.name = outputColumns.at(0).name + "(out)";
969 result.entries = entries;
970 return result;
971 }
972
973 virtual void collectColumnData () override
974 {
975 entries += outData.size();
976 }
977 };
978
979 template <typename T, template <typename> class Reader>
981 {
982 Reader<std::vector<std::vector<T>>> branchReader;
983 std::vector<ColumnarOffsetType> offsets = {0};
984 std::vector<T> columnData;
987 unsigned entries = 0;
988
989 explicit ColumnDataVectorVector (const std::string& val_branchName)
990 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
991 {
992 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
993 outputColumns.push_back ({.name = branchReader.columnName() + ".offset", .isOffset = true});
994 }
995
996 virtual bool connect(Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
997 {
998 auto iter = requestedColumns.find (outputColumns.at(0).name);
999 if (iter == requestedColumns.end())
1000 return false;
1001 outputColumns.at(0).enabled = true;
1002
1003 branchReader.connectTree(source);
1004
1005 if (iter->second.offsetName != outputColumns.at(1).name)
1006 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1007
1008 requestedColumns.erase (iter);
1009
1010 iter = requestedColumns.find (outputColumns.at(1).name);
1011 if (iter == requestedColumns.end())
1012 return true;
1013 requestedColumns.erase (iter);
1014 outputColumns.at(1).enabled = true;
1015 return true;
1016 }
1017
1018 virtual void clearColumns () override
1019 {
1020 columnData.clear();
1021 offsets.clear();
1022 offsets.push_back (0);
1023 }
1024
1025 virtual void getEntry (Long64_t entry) override
1026 {
1027 benchmark.startTimer ();
1028 const auto& branchData = branchReader.getEntry (entry);
1029 benchmark.stopTimer ();
1030 benchmarkUnpack.startTimer ();
1031 for (auto& data : branchData)
1032 {
1033 columnData.insert (columnData.end(), data.begin(), data.end());
1034 offsets.push_back (columnData.size());
1035 }
1036 benchmarkUnpack.stopTimer ();
1037 }
1038
1039 virtual void setData (ColumnVectorData& colData) override
1040 {
1041 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1042 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1043 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1044 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1045 }
1046
1047 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1048 {
1049 BranchPerfData result;
1050 result.name = branchReader.columnName();
1051 result.timeRead = benchmark.getEntryTime(emptyTime);
1052 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1053 benchmark.setSilence();
1054 benchmarkUnpack.setSilence();
1055 result.entrySize = branchReader.entrySize();
1056 result.uncompressedSize = branchReader.uncompressedSize();
1057 result.numBaskets = branchReader.numBaskets();
1058 result.entries = entries;
1059 return result;
1060 }
1061
1062 virtual void collectColumnData () override
1063 {
1064 entries += columnData.size();
1065 }
1066 };
1067
1068 template <typename T, template <typename> class Reader>
1070 {
1072 Reader<std::vector<std::vector<ElementLink<T>>>> branchReader;
1073 std::vector<ColumnarOffsetType> offsets = {0};
1074 std::vector<ColumnarOffsetType> eventOffsets = {0};
1078 unsigned entries = 0;
1079 unsigned nullEntries = 0;
1080
1081 explicit ColumnDataVectorVectorLink (const std::string& val_branchName)
1082 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
1083 {
1084 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
1085 outputColumns.push_back ({.name = branchReader.columnName() + ".offset", .isOffset = true});
1086 }
1087
1088 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1089 {
1090 auto iter = requestedColumns.find (outputColumns.at(0).name);
1091 if (iter == requestedColumns.end())
1092 return false;
1093 outputColumns.at(0).enabled = true;
1094
1095 branchReader.connectTree(source);
1096
1097 if (iter->second.offsetName != outputColumns.at(1).name)
1098 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1099 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1100 {
1101 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1102 requestedColumns.erase (keyColumn);
1103 }
1104
1105 requestedColumns.erase (iter);
1106
1107 iter = requestedColumns.find (outputColumns.at(1).name);
1108 if (iter == requestedColumns.end())
1109 return true;
1110 requestedColumns.erase (iter);
1111 outputColumns.at(1).enabled = true;
1112 return true;
1113 }
1114
1115 virtual void clearColumns () override
1116 {
1117 columnData.clear();
1118 offsets.clear();
1119 offsets.push_back (0);
1120 eventOffsets.clear();
1121 eventOffsets.push_back (0);
1122 }
1123
1124 virtual void getEntry (Long64_t entry) override
1125 {
1126 benchmark.startTimer ();
1127 const auto& branchData = branchReader.getEntry (entry);
1128 benchmark.stopTimer ();
1129 benchmarkUnpack.startTimer ();
1130 columnData.checkOffsets (eventOffsets.size() - 1);
1131 for (auto& data : branchData)
1132 {
1133 for (auto& element : data)
1134 {
1135 columnData.addLink (element, eventOffsets.size()-1);
1136 }
1137 offsets.push_back (columnData.size());
1138 }
1139 eventOffsets.push_back (offsets.size());
1140 benchmarkUnpack.stopTimer ();
1141 }
1142
1143 virtual void setData (ColumnVectorData& colData) override
1144 {
1145 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1146 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1147 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1148 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1149 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1150 {
1151 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1152 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1153 }
1154 }
1155
1156 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1157 {
1158 BranchPerfData result;
1159 result.name = branchReader.columnName();
1160 result.timeRead = benchmark.getEntryTime(emptyTime);
1161 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1162 benchmark.setSilence();
1163 benchmarkUnpack.setSilence();
1164 result.entrySize = branchReader.entrySize();
1165 result.uncompressedSize = branchReader.uncompressedSize();
1166 result.numBaskets = branchReader.numBaskets();
1167 result.entries = entries;
1168 result.nullEntries = nullEntries;
1169 return result;
1170 }
1171
1172 virtual void collectColumnData () override
1173 {
1174 entries += columnData.size();
1175 for (const auto& index : columnData)
1176 {
1178 nullEntries += 1;
1179 }
1180 }
1181 };
1182
1183 template <typename T, template <typename> class Reader>
1185 {
1186 std::string columnName;
1187 Reader<std::vector<std::vector<std::vector<T>>>> branchReader;
1188 std::vector<ColumnarOffsetType> outerOffsets = {0};
1189 std::vector<ColumnarOffsetType> innerOffsets = {0};
1190 std::vector<T> columnData;
1193 unsigned entries = 0;
1194
1195 explicit ColumnDataVectorVectorVector (const std::string& val_branchName)
1197 {
1198 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
1199 outputColumns.push_back ({.name = branchReader.columnName() + ".innerOffset", .isOffset = true});
1200 outputColumns.push_back ({.name = branchReader.columnName() + ".outerOffset", .isOffset = true});
1201 }
1202
1203 virtual bool connect(Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1204 {
1205 auto iter = requestedColumns.find (outputColumns.at(0).name);
1206 if (iter == requestedColumns.end())
1207 return false;
1208 outputColumns.at(0).enabled = true;
1209
1210 branchReader.connectTree(source);
1211
1212 if (iter->second.offsetName != outputColumns.at(1).name)
1213 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1214
1215 requestedColumns.erase (iter);
1216
1217 iter = requestedColumns.find (outputColumns.at(1).name);
1218 if (iter == requestedColumns.end())
1219 return true;
1220 outputColumns.at(1).enabled = true;
1221
1222 if (iter->second.offsetName != outputColumns.at(2).name)
1223 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(2).name);
1224
1225 requestedColumns.erase (iter);
1226
1227 iter = requestedColumns.find (outputColumns.at(2).name);
1228 if (iter == requestedColumns.end())
1229 return true;
1230 outputColumns.at(2).enabled = true;
1231 requestedColumns.erase (iter);
1232 return true;
1233 }
1234
1235 virtual void clearColumns () override
1236 {
1237 columnData.clear();
1238 innerOffsets.clear();
1239 innerOffsets.push_back (0);
1240 outerOffsets.clear();
1241 outerOffsets.push_back (0);
1242 }
1243
1244 virtual void getEntry (Long64_t entry) override
1245 {
1246 benchmark.startTimer ();
1247 const auto& branchData = branchReader.getEntry (entry);
1248 benchmark.stopTimer ();
1249 benchmarkUnpack.startTimer ();
1250 for (auto& outerData : branchData)
1251 {
1252 for (auto& innerData : outerData)
1253 {
1254 columnData.insert (columnData.end(), innerData.begin(), innerData.end());
1255 innerOffsets.push_back (columnData.size());
1256 }
1257 outerOffsets.push_back (innerOffsets.size()-1);
1258 }
1259 benchmarkUnpack.stopTimer ();
1260 }
1261
1262 virtual void setData (ColumnVectorData& colData) override
1263 {
1264 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1265 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1266 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1267 colData.setColumn (outputColumns.at(1).columnIndex, innerOffsets.size(), innerOffsets.data());
1268 if (outputColumns.at(2).columnIndex != ColumnVectorHeader::nullIndex)
1269 colData.setColumn (outputColumns.at(2).columnIndex, outerOffsets.size(), outerOffsets.data());
1270 }
1271
1272 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1273 {
1274 BranchPerfData result;
1275 result.name = branchReader.columnName();
1276 result.timeRead = benchmark.getEntryTime(emptyTime);
1277 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1278 benchmark.setSilence();
1279 benchmarkUnpack.setSilence();
1280 result.entrySize = branchReader.entrySize();
1281 result.uncompressedSize = branchReader.uncompressedSize();
1282 result.numBaskets = branchReader.numBaskets();
1283 result.entries = entries;
1284 return result;
1285 }
1286
1287 virtual void collectColumnData () override
1288 {
1289 entries += columnData.size();
1290 }
1291 };
1292
1293 template <typename T, template <typename> class Reader>
1295 {
1297 Reader<std::vector<ElementLink<T>>> branchReader;
1298 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
1299 std::vector<ColumnarOffsetType> offsets = {0};
1303 unsigned entries = 0;
1304 unsigned nullEntries = 0;
1305
1306 ColumnDataVectorLink (const std::string& val_branchName)
1307 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
1308 {
1309 outputColumns.push_back ({.name = branchReader.columnName()});
1310 outputColumns.push_back ({.name = branchReader.containerName(), .isOffset = true, .primary = false});
1311 }
1312
1313
1314 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1315 {
1316 auto iter = requestedColumns.find (outputColumns.at(0).name);
1317 if (iter == requestedColumns.end())
1318 return false;
1319 outputColumns.at(0).enabled = true;
1320
1321 branchReader.connectTree(source);
1322
1323 if (iter->second.offsetName != outputColumns.at(1).name)
1324 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1325 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1326 {
1327 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1328 requestedColumns.erase (keyColumn);
1329 }
1330
1331 requestedColumns.erase (iter);
1332
1333 if (auto offsetIter = offsetColumns.find (outputColumns.at(1).name); offsetIter != offsetColumns.end())
1334 offsetColumn = offsetIter->second;
1335 else
1336 offsetColumns.emplace (outputColumns.at(1).name, &offsets);
1337
1338 iter = requestedColumns.find (outputColumns.at(1).name);
1339 if (iter != requestedColumns.end())
1340 {
1341 outputColumns.at(1).enabled = true;
1342 requestedColumns.erase (iter);
1343 }
1344
1345 return true;
1346 }
1347
1348 virtual void clearColumns () override
1349 {
1350 columnData.clear();
1351 offsets.clear();
1352 offsets.push_back (0);
1353 }
1354
1355 virtual void getEntry (Long64_t entry) override
1356 {
1357 benchmark.startTimer ();
1358 const auto& branchData = branchReader.getEntry (entry);
1359 benchmark.stopTimer ();
1360 benchmarkUnpack.startTimer ();
1361 columnData.checkOffsets (offsets.size() - 1);
1362 for (auto& element : branchData)
1363 columnData.addLink (element, offsets.size()-1);
1364 offsets.push_back (columnData.size());
1365 if (offsetColumn)
1366 {
1367 if (offsetColumn->size() != offsets.size())
1368 throw std::runtime_error ("offset column not filled yet: " + outputColumns.at(1).name);
1369 if (offsetColumn->back() != offsets.back())
1370 throw std::runtime_error ("offset column does not match: " + outputColumns.at(1).name);
1371 }
1372 benchmarkUnpack.stopTimer ();
1373 }
1374
1375 virtual void setData (ColumnVectorData& colData) override
1376 {
1377 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1378 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1379 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1380 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1381 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1382 {
1383 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1384 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1385 }
1386 }
1387
1388 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1389 {
1390 BranchPerfData result;
1391 result.name = branchReader.columnName();
1392 result.timeRead = benchmark.getEntryTime(emptyTime);
1393 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1394 benchmark.setSilence();
1395 benchmarkUnpack.setSilence();
1396 result.entrySize = branchReader.entrySize();
1397 result.uncompressedSize = branchReader.uncompressedSize();
1398 result.numBaskets = branchReader.numBaskets();
1399 result.entries = entries;
1400 result.nullEntries = nullEntries;
1401 return result;
1402 }
1403
1404 virtual void collectColumnData () override
1405 {
1406 entries += columnData.size();
1407 for (const auto& index : columnData)
1408 {
1410 nullEntries += 1;
1411 }
1412 }
1413 };
1414
1415 template <typename T, template <typename> class Reader>
1417 {
1419 Reader<std::vector<ElementLink<T>>> branchReader;
1420 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
1421 std::vector<ColumnarOffsetType> offsets = {0};
1425 unsigned entries = 0;
1426 unsigned nullEntries = 0;
1427
1428 ColumnDataVectorRLink(const std::string& val_branchName)
1429 : branchReader(val_branchName), benchmarkUnpack(branchReader.columnName() + "(unpack)"), benchmark(branchReader.columnName())
1430 {
1431 outputColumns.push_back({.name = branchReader.columnName()});
1432 outputColumns.push_back({.name = branchReader.containerName(), .isOffset = true, .primary = false});
1433 }
1434
1435 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1436 {
1437 auto iter = requestedColumns.find(outputColumns.at(0).name);
1438 if (iter == requestedColumns.end())
1439 return false;
1440 outputColumns.at(0).enabled = true;
1441
1442 branchReader.connectTree(source);
1443 if (iter->second.offsetName != outputColumns.at(1).name)
1444 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1445
1446 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1447 {
1448 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1449 requestedColumns.erase (keyColumn);
1450 }
1451 requestedColumns.erase (iter);
1452
1453 if (auto offsetIter = offsetColumns.find (outputColumns.at(1).name); offsetIter != offsetColumns.end())
1454 offsetColumn = offsetIter->second;
1455 else
1456 offsetColumns.emplace (outputColumns.at(1).name, &offsets);
1457
1458 iter = requestedColumns.find (outputColumns.at(1).name);
1459 if (iter != requestedColumns.end())
1460 {
1461 outputColumns.at(1).enabled = true;
1462 requestedColumns.erase (iter);
1463 }
1464
1465 return true;
1466 }
1467
1468 virtual void clearColumns() override
1469 {
1470 columnData.clear();
1471 offsets.clear();
1472 offsets.push_back(0);
1473 }
1474
1475 virtual void getEntry(Long64_t entry) override
1476 {
1477 benchmark.startTimer();
1478 const auto& branchData = branchReader.getEntry(entry);
1479 benchmark.stopTimer();
1480 benchmarkUnpack.startTimer();
1481
1482 columnData.checkOffsets (offsets.size() - 1);
1483 for (const auto& element : branchData)
1484 {
1485 if (element.isDefault() || element.index() == static_cast<unsigned int>(-1))
1486 columnData.addEmptyLink ();
1487 else
1488 columnData.addSplitLink (element.index(), element.key(), offsets.size()-1);
1489 }
1490
1491
1492 offsets.push_back(columnData.size());
1493
1494 if (offsetColumn) {
1495 if (offsetColumn->size() != offsets.size())
1496 {
1497 throw std::runtime_error("offset column not filled yet: " + outputColumns.at(1).name);
1498 }
1499 if (offsetColumn->back() != offsets.back())
1500 {
1501 throw std::runtime_error("offset column does not match: " + outputColumns.at(1).name);
1502 }
1503 }
1504
1505 benchmarkUnpack.stopTimer();
1506 }
1507
1508 virtual void setData(ColumnVectorData& colData) override
1509 {
1510 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1511 colData.setColumn(outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1512 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1513 colData.setColumn(outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1514 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1515 {
1516 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1517 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1518 }
1519 }
1520
1521 [[nodiscard]] virtual BranchPerfData getPerfData(float emptyTime) override
1522 {
1523 BranchPerfData result;
1524 result.name = branchReader.columnName();
1525 result.timeRead = benchmark.getEntryTime(emptyTime);
1526 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1527 benchmark.setSilence();
1528 benchmarkUnpack.setSilence();
1529 result.entrySize = branchReader.entrySize();
1530 result.uncompressedSize = branchReader.uncompressedSize();
1531 result.numBaskets = branchReader.numBaskets();
1532 result.entries = entries;
1533 result.nullEntries = nullEntries;
1534 return result;
1535 }
1536
1537 virtual void collectColumnData() override
1538 {
1539 entries += columnData.size();
1540 for (const auto& index : columnData)
1541 {
1543 nullEntries += 1;
1544 }
1545 }
1546 };
1547
1548
1549 template<typename T>
1551 {
1556 const std::vector<ColumnarOffsetType>* offsetColumn = nullptr;
1557 std::vector<ColumnarOffsetType> offsets = {0};
1561 unsigned entries = 0;
1562 unsigned nullEntries = 0;
1563
1564 ColumnDataVectorSplitLink (const std::string& val_branchName)
1565 : branchReaderSize (val_branchName), branchReaderKey (val_branchName + ".m_persKey"), branchReaderIndex (val_branchName + ".m_persIndex"), benchmarkUnpack (branchReaderSize.columnName()+"(unpack)"), benchmark (branchReaderSize.columnName())
1566 {
1567 outputColumns.push_back ({.name = branchReaderSize.columnName()});
1568 outputColumns.push_back ({.name = branchReaderSize.containerName(), .isOffset = true, .primary = false});
1569 }
1570
1571 virtual bool connect (Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string,ColumnInfo>& requestedColumns) override
1572 {
1573 auto iter = requestedColumns.find (outputColumns.at(0).name);
1574 if (iter == requestedColumns.end())
1575 return false;
1576 outputColumns.at(0).enabled = true;
1577 auto* tree = std::get<TTree*>(source);
1578 branchReaderSize.connectTree (tree);
1579 branchReaderKey.connectTree (tree);
1580 branchReaderIndex.connectTree (tree);
1581
1582 if (iter->second.offsetName != outputColumns.at(1).name)
1583 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1584
1585 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1586 {
1587 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1588 requestedColumns.erase (keyColumn);
1589 }
1590 requestedColumns.erase (iter);
1591
1592 if (auto offsetIter = offsetColumns.find (outputColumns.at(1).name); offsetIter != offsetColumns.end())
1593 offsetColumn = offsetIter->second;
1594 else
1595 offsetColumns.emplace (outputColumns.at(1).name, &offsets);
1596
1597 iter = requestedColumns.find (outputColumns.at(1).name);
1598 if (iter != requestedColumns.end())
1599 {
1600 outputColumns.at(1).enabled = true;
1601 requestedColumns.erase (iter);
1602 }
1603
1604 return true;
1605 }
1606
1607 virtual void clearColumns () override
1608 {
1609 columnData.clear();
1610 offsets.clear();
1611 offsets.push_back (0);
1612 }
1613
1614 virtual void getEntry (Long64_t entry) override
1615 {
1616 benchmark.startTimer ();
1617 std::size_t branchDataSize = branchReaderSize.getEntry (entry);
1618 auto branchDataKey = branchReaderKey.getEntry (entry, branchDataSize);
1619 auto branchDataIndex = branchReaderIndex.getEntry (entry, branchDataSize);
1620 benchmark.stopTimer ();
1621 benchmarkUnpack.startTimer ();
1622 columnData.checkOffsets (offsets.size() - 1);
1623 for (std::size_t index = 0; index < branchDataSize; ++index)
1624 {
1625 if (branchDataIndex[index] == static_cast<UInt_t>(-1))
1626 columnData.addEmptyLink ();
1627 else
1628 columnData.addSplitLink (branchDataIndex[index], branchDataKey[index], offsets.size()-1);
1629 }
1630 offsets.push_back (columnData.size());
1631 if (offsetColumn)
1632 {
1633 if (offsetColumn->size() != offsets.size())
1634 throw std::runtime_error ("offset column not filled yet: " + outputColumns.at(1).name);
1635 if (offsetColumn->back() != offsets.back())
1636 throw std::runtime_error ("offset column does not match: " + outputColumns.at(1).name);
1637 }
1638 benchmarkUnpack.stopTimer ();
1639 }
1640
1641 virtual void setData (ColumnVectorData& colData) override
1642 {
1643 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1644 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1645 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1646 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1647 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1648 {
1649 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1650 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1651 }
1652 }
1653
1654 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1655 {
1656 BranchPerfData result;
1657 result.name = branchReaderSize.columnName();
1658 result.timeRead = benchmark.getEntryTime(emptyTime);
1659 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1660 benchmark.setSilence();
1661 benchmarkUnpack.setSilence();
1662 result.entrySize = branchReaderSize.entrySize().value() + branchReaderKey.entrySize().value() + branchReaderIndex.entrySize().value();
1663 result.uncompressedSize = branchReaderSize.uncompressedSize().value() + branchReaderKey.uncompressedSize().value() + branchReaderIndex.uncompressedSize().value();
1664 result.numBaskets = branchReaderSize.numBaskets().value() + branchReaderKey.numBaskets().value() + branchReaderIndex.numBaskets().value();
1665 result.entries = entries;
1666 result.nullEntries = nullEntries;
1667 return result;
1668 }
1669
1670 virtual void collectColumnData () override
1671 {
1672 entries += columnData.size();
1673 for (const auto& index : columnData)
1674 {
1676 nullEntries += 1;
1677 }
1678 }
1679 };
1680
1681 template <typename T, template <typename> class Reader>
1683 {
1685 Reader<std::vector<std::vector<ElementLink<T>>>> branchReader;
1686 std::vector<ColumnarOffsetType> offsets = {0};
1687 std::vector<ColumnarOffsetType> eventOffsets = {0};
1691 unsigned entries = 0;
1692 unsigned nullEntries = 0;
1693
1694 explicit ColumnDataVectorVectorVariantLink (const std::string& val_branchName)
1695 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
1696 {
1697 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
1698 outputColumns.push_back ({.name = branchReader.columnName() + ".offset", .isOffset = true});
1699 }
1700
1701 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1702 {
1703 auto iter = requestedColumns.find (outputColumns.at(0).name);
1704 if (iter == requestedColumns.end())
1705 return false;
1706 outputColumns.at(0).enabled = true;
1707
1708 branchReader.connectTree(source);
1709
1710 if (iter->second.offsetName != outputColumns.at(1).name)
1711 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1712
1713 for (auto keyColumn : columnData.connect (iter->second, offsetColumns, requestedColumns))
1714 {
1715 outputColumns.push_back ({.name = keyColumn, .primary = false, .enabled = true});
1716 requestedColumns.erase (keyColumn);
1717 }
1718
1719 requestedColumns.erase (iter);
1720
1721 iter = requestedColumns.find (outputColumns.at(1).name);
1722 if (iter != requestedColumns.end())
1723 {
1724 outputColumns.at(1).enabled = true;
1725 requestedColumns.erase (iter);
1726 }
1727
1728 iter = requestedColumns.find (outputColumns.at(2).name);
1729 if (iter != requestedColumns.end())
1730 {
1731 outputColumns.at(2).enabled = true;
1732 requestedColumns.erase (iter);
1733 }
1734 return true;
1735 }
1736
1737 virtual void clearColumns () override
1738 {
1739 columnData.clear();
1740 offsets.clear();
1741 offsets.push_back (0);
1742 eventOffsets.clear();
1743 eventOffsets.push_back (0);
1744 }
1745
1746 virtual void getEntry (Long64_t entry) override
1747 {
1748 benchmark.startTimer ();
1749 const auto& branchData = branchReader.getEntry (entry);
1750 benchmark.stopTimer ();
1751 benchmarkUnpack.startTimer ();
1752 columnData.checkOffsets (eventOffsets.size() - 1);
1753 for (auto& data : branchData)
1754 {
1755 for (auto& element : data)
1756 {
1757 columnData.addLink (element, eventOffsets.size()-1);
1758 }
1759 offsets.push_back (columnData.size());
1760 }
1761 eventOffsets.push_back (offsets.size());
1762 benchmarkUnpack.stopTimer ();
1763 }
1764
1765 virtual void setData (ColumnVectorData& colData) override
1766 {
1767 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1768 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1769 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1770 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1771 for (std::size_t i = 2; i < outputColumns.size(); ++ i)
1772 {
1773 if (outputColumns.at(i).columnIndex != ColumnVectorHeader::nullIndex)
1774 colData.setColumn (outputColumns.at(i).columnIndex, columnData.keysColumn(i-2).size(), columnData.keysColumn(i-2).data());
1775 }
1776 }
1777
1778 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1779 {
1780 BranchPerfData result;
1781 result.name = branchReader.columnName();
1782 result.timeRead = benchmark.getEntryTime(emptyTime);
1783 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1784 benchmark.setSilence();
1785 benchmarkUnpack.setSilence();
1786 result.entrySize = branchReader.entrySize();
1787 result.uncompressedSize = branchReader.uncompressedSize();
1788 result.numBaskets = branchReader.numBaskets();
1789 result.entries = entries;
1790 result.nullEntries = nullEntries;
1791 return result;
1792 }
1793
1794 virtual void collectColumnData () override
1795 {
1796 entries += columnData.size();
1797 for (const auto& index : columnData)
1798 {
1800 nullEntries += 1;
1801 }
1802 }
1803 };
1804
1805 template <template <typename> class Reader>
1807 {
1808 Reader<std::vector<std::string>> branchReader;
1809 std::vector<ColumnarOffsetType> offsets = {0};
1810 std::vector<char> columnData;
1811 std::vector<std::size_t> columnHashData;
1814
1815 ColumnDataMetNames (const std::string& val_branchName)
1816 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+"(unpack)"), benchmark (branchReader.columnName())
1817 {
1818 outputColumns.push_back ({.name = branchReader.columnName() + ".data"});
1819 outputColumns.push_back ({.name = branchReader.columnName() + ".offset", .isOffset = true});
1820 outputColumns.push_back ({.name = branchReader.columnName() + "Hash"});
1821 }
1822
1823 virtual bool connect(Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1824 {
1825 auto iter = requestedColumns.find (outputColumns.at(0).name);
1826 if (iter == requestedColumns.end())
1827 return false;
1828 outputColumns.at(0).enabled = true;
1829
1830 branchReader.connectTree(source);
1831
1832 if (iter->second.offsetName != outputColumns.at(1).name)
1833 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
1834
1835 requestedColumns.erase (iter);
1836
1837 iter = requestedColumns.find (outputColumns.at(1).name);
1838 if (iter == requestedColumns.end())
1839 {
1840 return true;
1841 }
1842 outputColumns.at(1).enabled = true;
1843 requestedColumns.erase (iter);
1844
1845 iter = requestedColumns.find (outputColumns.at(2).name);
1846 if (iter != requestedColumns.end())
1847 {
1848 outputColumns.at(2).enabled = true;
1849 requestedColumns.erase (iter);
1850 }
1851 return true;
1852 }
1853
1854 virtual void clearColumns () override
1855 {
1856 columnData.clear();
1857 offsets.clear();
1858 offsets.push_back (0);
1859 columnHashData.clear();
1860 }
1861
1862 virtual void getEntry (Long64_t entry) override
1863 {
1864 benchmark.startTimer ();
1865 const auto& branchData = branchReader.getEntry (entry);
1866 benchmark.stopTimer ();
1867 benchmarkUnpack.startTimer ();
1868 for (auto& data : branchData)
1869 {
1870 columnData.insert (columnData.end(), data.begin(), data.end());
1871 offsets.push_back (columnData.size());
1872 columnHashData.push_back (std::hash<std::string> () (data));
1873 }
1874 benchmarkUnpack.stopTimer ();
1875 }
1876
1877 virtual void setData (ColumnVectorData& colData) override
1878 {
1879 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1880 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
1881 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1882 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
1883 if (outputColumns.at(2).columnIndex != ColumnVectorHeader::nullIndex)
1884 colData.setColumn (outputColumns.at(2).columnIndex, columnHashData.size(), columnHashData.data());
1885 }
1886
1887 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
1888 {
1889 BranchPerfData result;
1890 result.name = branchReader.columnName();
1891 result.timeRead = benchmark.getEntryTime(emptyTime);
1892 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1893 benchmark.setSilence();
1894 benchmarkUnpack.setSilence();
1895 result.entrySize = branchReader.entrySize();
1896 result.uncompressedSize = branchReader.uncompressedSize();
1897 result.numBaskets = branchReader.numBaskets();
1898 return result;
1899 }
1900
1901 virtual void collectColumnData () override
1902 {}
1903 };
1904
1905 template <template <typename> class Reader>
1907 {
1908 std::vector<std::string> termNames;
1909 const std::vector<ColumnarOffsetType>* offsetColumns = nullptr;
1910 std::vector<ColumnarOffsetType> offsets = {0};
1911 std::vector<ColumnarOffsetType> namesOffsets = {0};
1912 std::vector<char> namesData;
1913 std::vector<std::size_t> namesHash;
1914
1915 ColumnDataOutputMet (const std::string& val_columnName, std::vector<std::string> val_termNames)
1916 : termNames (std::move (val_termNames))
1917 {
1918 outputColumns.push_back ({.name = val_columnName, .isOffset = true});
1919 outputColumns.push_back ({.name = val_columnName + ".name.data"});
1920 outputColumns.push_back ({.name = val_columnName + ".name.offset", .isOffset = true});
1921 outputColumns.push_back ({.name = val_columnName + ".nameHash"});
1922 }
1923
1924 virtual bool connect([[maybe_unused]]Backend source, std::unordered_map<std::string, const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
1925 {
1926 if (auto iter = requestedColumns.find (outputColumns.at(0).name);
1927 iter != requestedColumns.end())
1928 requestedColumns.erase (iter);
1929 else
1930 return false;
1931 outputColumns.at(0).enabled = true;
1932
1933 if (auto iter = requestedColumns.find (outputColumns.at(1).name);
1934 iter != requestedColumns.end())
1935 {
1936 outputColumns.at(1).enabled = true;
1937 requestedColumns.erase (iter);
1938 }
1939
1940 if (auto iter = requestedColumns.find (outputColumns.at(2).name);
1941 iter != requestedColumns.end())
1942 {
1943 outputColumns.at(2).enabled = true;
1944 requestedColumns.erase (iter);
1945 }
1946
1947 if (auto iter = requestedColumns.find (outputColumns.at(3).name);
1948 iter != requestedColumns.end())
1949 {
1950 outputColumns.at(3).enabled = true;
1951 requestedColumns.erase (iter);
1952 }
1953
1954 // For multi-tool support, skip if offset column already registered
1955 if (auto offsetIter = offsetColumns.find (outputColumns.at(0).name); offsetIter == offsetColumns.end())
1956 offsetColumns.emplace (outputColumns.at(0).name, &offsets);
1957
1958 return true;
1959 }
1960
1961 virtual void clearColumns () override
1962 {
1963 offsets.clear ();
1964 offsets.push_back (0);
1965 namesData.clear ();
1966 namesOffsets.clear ();
1967 namesOffsets.push_back (0);
1968 namesHash.clear ();
1969 }
1970
1971 virtual void getEntry (Long64_t /*entry*/) override
1972 {
1973 for (const auto& termName : termNames)
1974 {
1975 namesData.insert (namesData.end(), termName.begin(), termName.end());
1976 namesOffsets.push_back (namesData.size());
1977 namesHash.push_back (std::hash<std::string> () (termName));
1978 }
1979 offsets.push_back (namesHash.size());
1980 }
1981
1982 virtual void setData (ColumnVectorData& colData) override
1983 {
1984 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
1985 colData.setColumn (outputColumns.at(0).columnIndex, offsets.size(), offsets.data());
1986 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
1987 colData.setColumn (outputColumns.at(1).columnIndex, namesData.size(), namesData.data());
1988 if (outputColumns.at(2).columnIndex != ColumnVectorHeader::nullIndex)
1989 colData.setColumn (outputColumns.at(2).columnIndex, namesOffsets.size(), namesOffsets.data());
1990 if (outputColumns.at(3).columnIndex != ColumnVectorHeader::nullIndex)
1991 colData.setColumn (outputColumns.at(3).columnIndex, namesHash.size(), namesHash.data());
1992 }
1993
1994 [[nodiscard]] virtual BranchPerfData getPerfData (float /*emptyTime*/) override
1995 {
1996 BranchPerfData result;
1997 result.name = outputColumns.at(0).name + "(met-out)";
1998 return result;
1999 }
2000
2001 virtual void collectColumnData () override
2002 {}
2003 };
2004
2005 template <template <typename> class Reader>
2007 {
2008 Reader<xAOD::CaloClusterContainer> branchReader;
2009 std::vector<ColumnarOffsetType> offsets = {0};
2010 std::vector<std::uint32_t> columnData;
2013 unsigned entries = 0;
2014
2015 ColumnDataSamplingPattern (const std::string& val_branchName)
2016 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+".samplingPattern(fallback)(unpack)"), benchmark (branchReader.columnName() + ".samplingPattern(fallback)")
2017 {
2018 outputColumns.push_back ({.name = branchReader.columnName() + ".samplingPattern"});
2019 outputColumns.push_back ({.name = branchReader.columnName(), .isOffset = true, .primary = false});
2020 }
2021
2022 virtual bool connect(Backend source, std::unordered_map<std::string,const std::vector<ColumnarOffsetType>*>& /*offsetColumns*/, std::unordered_map<std::string, ColumnInfo>& requestedColumns) override
2023 {
2024 auto iter = requestedColumns.find (outputColumns.at(0).name);
2025 if (iter == requestedColumns.end())
2026 return false;
2027 outputColumns.at(0).enabled = true;
2028
2029 branchReader.connectTree(source);
2030 if (iter->second.offsetName != outputColumns.at(1).name)
2031 throw std::runtime_error ("offset name mismatch: " + iter->second.offsetName + " != " + outputColumns.at(1).name);
2032
2033 requestedColumns.erase (iter);
2034
2035 iter = requestedColumns.find (outputColumns.at(1).name);
2036 if (iter == requestedColumns.end())
2037 {
2038 return true;
2039 }
2040 outputColumns.at(1).enabled = true;
2041 requestedColumns.erase (iter);
2042 return true;
2043 }
2044
2045 virtual void clearColumns () override
2046 {
2047 columnData.clear();
2048 offsets.clear();
2049 offsets.push_back (0);
2050 }
2051
2052 virtual void getEntry (Long64_t entry) override
2053 {
2054 benchmark.startTimer ();
2055 const auto& branchData = branchReader.getEntry (entry);
2056 benchmark.stopTimer ();
2057 benchmarkUnpack.startTimer ();
2058 for (auto data : branchData)
2059 {
2060 columnData.push_back (data->samplingPattern());
2061 }
2062 offsets.push_back (columnData.size());
2063 benchmarkUnpack.stopTimer ();
2064 }
2065
2066 virtual void setData (ColumnVectorData& colData) override
2067 {
2068 if (outputColumns.at(0).columnIndex != ColumnVectorHeader::nullIndex)
2069 colData.setColumn (outputColumns.at(0).columnIndex, columnData.size(), columnData.data());
2070 if (outputColumns.at(1).columnIndex != ColumnVectorHeader::nullIndex)
2071 colData.setColumn (outputColumns.at(1).columnIndex, offsets.size(), offsets.data());
2072 }
2073
2074 [[nodiscard]] virtual BranchPerfData getPerfData (float emptyTime) override
2075 {
2076 BranchPerfData result;
2077 result.name = branchReader.columnName() + "(fallback)";
2078 result.timeRead = benchmark.getEntryTime(emptyTime);
2079 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
2080 benchmark.setSilence();
2081 benchmarkUnpack.setSilence();
2082 result.entrySize = branchReader.entrySize();
2083 result.uncompressedSize = branchReader.uncompressedSize();
2084 result.numBaskets = branchReader.numBaskets();
2085 result.entries = entries;
2086 return result;
2087 }
2088
2089 virtual void collectColumnData () override
2090 {
2091 entries += columnData.size();
2092 }
2093 };
2094
2095 namespace
2096 {
2098 struct ToolData
2099 {
2100 std::string name;
2101 ColumnarTool<ColumnarModeArray>* tool = nullptr;
2102 std::unique_ptr<ToolColumnVectorMap> toolWrapper;
2103 bool noRepeatCall = false;
2104 bool runToolTwice = false;
2105
2106 Benchmark benchmarkCall;
2107 Benchmark benchmarkCall2;
2108
2109 ToolData (const UserConfiguration& config, const TestDefinition& td,
2110 ColumnVectorHeader& columnHeader)
2111 : name (td.name)
2112 , noRepeatCall (td.noRepeatCall)
2113 , runToolTwice (config.runToolTwice)
2114 , benchmarkCall ("", config.batchSize)
2115 , benchmarkCall2 ("", config.batchSize)
2116 {
2117 tool = dynamic_cast<ColumnarTool<ColumnarModeArray>*>(td.tool);
2118 if (!tool)
2119 throw std::runtime_error ("tool is not a ColumnarTool<ColumnarModeArray>: " + td.name);
2120 if (!td.containerRenames.empty())
2122 toolWrapper = std::make_unique<ToolColumnVectorMap> (columnHeader, *tool);
2123 }
2124
2126 void call (ColumnVectorData& columnData)
2127 {
2128 benchmarkCall.startTimer ();
2129 columnData.callNoCheck (*tool);
2130 benchmarkCall.stopTimer ();
2131 if (runToolTwice && !noRepeatCall)
2132 {
2133 benchmarkCall2.startTimer ();
2134 columnData.callNoCheck (*tool);
2135 benchmarkCall2.stopTimer ();
2136 }
2137 }
2138 };
2139 }
2140 }
2141
2142
2143
2144
2145 ColumnarPhysLiteTest ::
2146 ColumnarPhysLiteTest ()
2147 {
2148 static std::once_flag flag;
2149 std::call_once (flag, [] ()
2150 {
2151#ifdef XAOD_STANDALONE
2152 xAOD::Init().ignore();
2153#else
2154 POOL::Init();
2155#endif
2156 });
2157
2158 auto userConfiguration = TestUtils::UserConfiguration::fromEnvironment();
2159 if (userConfiguration.isrntuple)
2160 {
2161 auto* fileName = getenv("ASG_TEST_FILE_RNTUPLE_LITE_MC");
2162 if (fileName == nullptr)
2163 throw std::runtime_error("missing ASG_TEST_FILE_RNTUPLE_LITE_MC");
2164 rntreader = ROOT::RNTupleReader::Open("EventData", fileName);
2165 inspector = ROOT::Experimental::RNTupleInspector::Create("EventData", fileName);
2167 if (!rntreader or !inspector)
2168 throw std::runtime_error("failed to open rntuple");
2169 } else
2170 {
2171 auto* fileName = getenv("ASG_TEST_FILE_LITE_MC");
2172 if (fileName == nullptr)
2173 throw std::runtime_error("missing ASG_TEST_FILE_LITE_MC");
2174 file.reset(TFile::Open(fileName, "READ"));
2175 if (!file)
2176 throw std::runtime_error("failed to open file");
2177 tree = dynamic_cast<TTree*>(file->Get("CollectionTree"));
2178 if (!tree)
2179 throw std::runtime_error("failed to open rntuple");
2180 }
2181 }
2182
2183 ColumnarPhysLiteTest ::~ColumnarPhysLiteTest()
2184 {
2185 if (rntbackend)
2186 delete rntbackend;
2187 }
2188
2189 std::string ColumnarPhysLiteTest :: makeUniqueName ()
2190 {
2191 static std::atomic<unsigned> index = 0;
2192 return "UniquePhysliteTestTool" + std::to_string(++index);
2193 }
2194
2195 bool ColumnarPhysLiteTest ::
2196 checkMode ()
2197 {
2198 return true;
2199 }
2200
2201 void ColumnarPhysLiteTest :: setupKnownColumns (std::span<const TestDefinition> testDefinitions)
2202 {
2203 using namespace TestUtils;
2204
2205 knownColumns.push_back (std::make_shared<ColumnDataEventCount> ());
2206
2207 if (tree)
2208 {
2209 tree->SetMakeClass(1);
2210 {
2211 std::unordered_map<std::string, TBranch*> branches;
2212 {
2213 TIter branchIter(tree->GetListOfBranches());
2214 TObject* obj = nullptr;
2215 while ((obj = branchIter()))
2216 {
2217 TBranch* branch = nullptr;
2218 if ((branch = dynamic_cast<TBranch*>(obj)))
2219 {
2220 branches.emplace(branch->GetName(), branch);
2221 TIter subBranchIter(branch->GetListOfBranches());
2222 while ((obj = subBranchIter()))
2223 {
2224 if (auto subBranch = dynamic_cast<TBranch*>(obj))
2225 branches.emplace(subBranch->GetName(), subBranch);
2226 }
2227 }
2228 }
2229 }
2230
2231 for (const auto& [name, branch] : branches)
2232 {
2233 if (name.find("AuxDyn.") != std::string::npos ||
2234 name.find("Aux.") != std::string::npos)
2235 {
2236 TClass* branchClass = nullptr;
2237 EDataType branchType{};
2238 branch->GetExpectedType(branchClass, branchType);
2239 if (branchClass == nullptr)
2240 {
2241 switch (branchType)
2242 {
2243 case kInt_t:
2244 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::int32_t, BranchReader>>(branch->GetName()));
2245 break;
2246 case kUInt_t:
2247 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint32_t, BranchReader>>(branch->GetName()));
2248 break;
2249 case kULong_t:
2250 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint64_t, BranchReader>>(branch->GetName()));
2251 break;
2252 case kULong64_t:
2253 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint64_t, BranchReader>>(branch->GetName()));
2254 break;
2255 case kFloat_t:
2256 knownColumns.push_back(std::make_shared<ColumnDataScalar<float, BranchReader>>(branch->GetName()));
2257 break;
2258 default:
2259 // no-op
2260 break;
2261 }
2262 } else
2263 {
2264 if (*branchClass->GetTypeInfo() == typeid(std::vector<float>))
2265 {
2266 knownColumns.push_back (std::make_shared<ColumnDataVector<float,BranchReader>> (branch->GetName()));
2267 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<char>))
2268 {
2269 knownColumns.push_back (std::make_shared<ColumnDataVector<char,BranchReader>> (branch->GetName()));
2270 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::int8_t>))
2271 {
2272 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int8_t,BranchReader>> (branch->GetName()));
2273 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::uint8_t>))
2274 {
2275 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint8_t,BranchReader>> (branch->GetName()));
2276 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::int16_t>))
2277 {
2278 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int16_t,BranchReader>> (branch->GetName()));
2279 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::uint16_t>))
2280 {
2281 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint16_t,BranchReader>> (branch->GetName()));
2282 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::int32_t>))
2283 {
2284 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int32_t,BranchReader>> (branch->GetName()));
2285 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::uint32_t>))
2286 {
2287 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint32_t,BranchReader>> (branch->GetName()));
2288 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::int64_t>))
2289 {
2290 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int64_t,BranchReader>> (branch->GetName()));
2291 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::uint64_t>))
2292 {
2293 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint64_t,BranchReader>> (branch->GetName()));
2294 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<float>>))
2295 {
2296 knownColumns.push_back (std::make_shared<ColumnDataVectorVector<float,BranchReader>> (branch->GetName()));
2297 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<std::int32_t>>))
2298 {
2299 knownColumns.push_back (std::make_shared<ColumnDataVectorVector<std::int32_t,BranchReader>> (branch->GetName()));
2300 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<std::uint64_t>>))
2301 {
2302 knownColumns.push_back (std::make_shared<ColumnDataVectorVector<std::uint64_t,BranchReader>> (branch->GetName()));
2303 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<std::vector<std::size_t>>>))
2304 {
2305 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorVector<std::size_t,BranchReader>> (branch->GetName()));
2306 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::vector<std::vector<unsigned char>>>))
2307 {
2308 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorVector<unsigned char,BranchReader>> (branch->GetName()));
2309 } else if (*branchClass->GetTypeInfo() == typeid(std::vector<std::string>))
2310 {
2311 knownColumns.push_back (std::make_shared<ColumnDataMetNames<BranchReader>> (branch->GetName()));
2312 }
2313 }
2314 }
2315 }
2316 }
2317 // This is a fallback for the case that we don't have an explicit
2318 // `samplingPattern` branch in our input file (i.e. an older file),
2319 // to allow us to still test tools needing it. This is likely not
2320 // something that actual users can do (they need the new files), but
2321 // for testing it seems like a reasonable workaround.
2322 knownColumns.push_back(std::make_shared<ColumnDataSamplingPattern<BranchReader>>("egammaClusters"));
2323
2324 // For branches that are element links they need to be explicitly
2325 // declared to have the correct xAOD type, correct split setting,
2326 // and correct linked containers.
2327
2328 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer,BranchReader>>("AnalysisElectronsAuxDyn.caloClusterLinks"));
2329 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer, BranchReader>>("AnalysisElectronsAuxDyn.trackParticleLinks"));
2330 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer,BranchReader>>("AnalysisPhotonsAuxDyn.caloClusterLinks"));
2331 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::VertexContainer, BranchReader>>("AnalysisPhotonsAuxDyn.vertexLinks"));
2332 knownColumns.push_back(std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>>("AnalysisMuonsAuxDyn.inDetTrackParticleLink"));
2333 knownColumns.push_back(std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>>("AnalysisMuonsAuxDyn.combinedTrackParticleLink"));
2334 knownColumns.push_back(std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>>("AnalysisMuonsAuxDyn.extrapolatedMuonSpectrometerTrackParticleLink"));
2335 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer, BranchReader>>("GSFConversionVerticesAuxDyn.trackParticleLinks"));
2336 knownColumns.push_back(std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>>("GSFTrackParticlesAuxDyn.originalTrackParticle"));
2337 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer, BranchReader>>("AnalysisJetsAuxDyn.GhostTrack"));
2338 knownColumns.push_back(std::make_shared<ColumnDataVectorLink<xAOD::JetContainer, BranchReader>>("METAssoc_AnalysisMETAux.jetLink"));
2339 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer, BranchReader>>("METAssoc_AnalysisMETAux.objectLinks"));
2340
2341 }else if (rntbackend)
2342 {
2343 std::unordered_map<std::string, ROOT::DescriptorId_t> fields;
2344 {
2345 const auto& desc = rntreader->GetDescriptor();
2346
2347 for (const auto& field : desc.GetTopLevelFields())
2348 {
2349 auto fieldName = field.GetFieldName();
2350 fields.emplace(desc.GetQualifiedFieldName(field.GetId()), field.GetId());
2351
2352 std::vector<ROOT::DescriptorId_t> subFieldIds{field.GetId()};
2353 while (!subFieldIds.empty())
2354 {
2355 const auto parentId = subFieldIds.back();
2356 auto parentname=desc.GetQualifiedFieldName(parentId);
2357 subFieldIds.pop_back();
2358
2359 for (const auto& subField : desc.GetFieldIterable(parentId))
2360 {
2361 auto subFieldName = desc.GetQualifiedFieldName(subField.GetId());
2362
2363 fields.emplace(desc.GetQualifiedFieldName(subField.GetId()), subField.GetId());
2364
2365 subFieldIds.push_back(subField.GetId());
2366 }
2367 }
2368 }
2369 }
2370
2371 const auto& desc = rntreader->GetDescriptor();
2372 for (const auto& [name, fieldId] : fields)
2373 {
2374 auto fieldName = desc.GetQualifiedFieldName(fieldId);
2375
2376 if (name.find("AuxDyn:") != std::string::npos ||
2377 name.find("Aux:") != std::string::npos)
2378 {
2379
2380 const auto& fieldDesc = desc.GetFieldDescriptor(fieldId);
2381 const std::string typeName = desc.GetTypeNameForComparison(fieldDesc);
2382 if (typeName == "std::int32_t" || typeName == "int")
2383 {
2384 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::int32_t, RNTFieldReader>>(name));
2385 } else if (typeName == "std::uint32_t" || typeName == "unsigned int")
2386 {
2387 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint32_t, RNTFieldReader>>(name));
2388 } else if (typeName == "std::uint64_t" || typeName == "unsigned long" || typeName == "unsigned long long")
2389 {
2390 knownColumns.push_back(std::make_shared<ColumnDataScalar<std::uint64_t, RNTFieldReader>>(name));
2391 } else if (typeName == "float")
2392 {
2393 knownColumns.push_back(std::make_shared<ColumnDataScalar<float, RNTFieldReader>>(name));
2394 } else if (typeName == "std::vector<float>")
2395 {
2396 knownColumns.push_back(std::make_shared<ColumnDataVector<float, RNTFieldReader>>(name));
2397 } else if (typeName == "std::vector<char>")
2398 {
2399 knownColumns.push_back(std::make_shared<ColumnDataVector<char, RNTFieldReader>>(name));
2400 } else if (typeName == "std::vector<std::int8_t>")
2401 {
2402 knownColumns.push_back(std::make_shared<ColumnDataVector<std::int8_t, RNTFieldReader>>(name));
2403 } else if (typeName == "std::vector<std::uint8_t>")
2404 {
2405 knownColumns.push_back(std::make_shared<ColumnDataVector<std::uint8_t, RNTFieldReader>>(name));
2406 } else if (typeName == "std::vector<std::int16_t>")
2407 {
2408 knownColumns.push_back(std::make_shared<ColumnDataVector<std::int16_t, RNTFieldReader>>(name));
2409 } else if (typeName == "std::vector<std::uint16_t>")
2410 {
2411 knownColumns.push_back(std::make_shared<ColumnDataVector<std::uint16_t, RNTFieldReader>>(name));
2412 } else if (typeName == "std::vector<std::int32_t>")
2413 {
2414 knownColumns.push_back(std::make_shared<ColumnDataVector<std::int32_t, RNTFieldReader>>(name));
2415 } else if (typeName == "std::vector<std::uint32_t>")
2416 {
2417 knownColumns.push_back(std::make_shared<ColumnDataVector<std::uint32_t, RNTFieldReader>>(name));
2418 } else if (typeName == "std::vector<std::int64_t>")
2419 {
2420 knownColumns.push_back(std::make_shared<ColumnDataVector<std::int64_t, RNTFieldReader>>(name));
2421 } else if (typeName == "std::vector<std::uint64_t>")
2422 {
2423 knownColumns.push_back(std::make_shared<ColumnDataVector<std::uint64_t, RNTFieldReader>>(name));
2424 } else if (typeName == "std::vector<std::vector<float>>")
2425 {
2426 knownColumns.push_back(std::make_shared<ColumnDataVectorVector<float, RNTFieldReader>>(name));
2427 } else if (typeName == "std::vector<std::vector<std::int32_t>>")
2428 {
2429 knownColumns.push_back(std::make_shared<ColumnDataVectorVector<std::int32_t, RNTFieldReader>>(name));
2430 } else if (typeName == "std::vector<std::vector<std::uint64_t>>")
2431 {
2433 } else if (typeName =="std::vector<std::vector<std::vector<std::size_t>>>")
2434 {
2436 }else if (typeName =="std::vector<std::vector<std::vector<std::uint64_t>>>")
2437 {
2439 }else if (typeName =="std::vector<std::vector<std::vector<std::uint8_t>>>")
2440 {
2442 } else if (typeName =="std::vector<std::vector<std::vector<unsigned char>>>")
2443 {
2445 } else if (typeName == "std::vector<std::string>")
2446 {
2447 knownColumns.push_back(std::make_shared<ColumnDataMetNames<RNTFieldReader>>(name));
2448 }
2449 }
2450 }
2451 knownColumns.push_back(std::make_shared<ColumnDataSamplingPattern<RNTFieldReader>>("egammaClusters"));
2452
2453 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer,RNTFieldReader>>("AnalysisElectronsAuxDyn:caloClusterLinks"));
2454 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer, RNTFieldReader>>("AnalysisElectronsAuxDyn:trackParticleLinks"));
2455 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer,RNTFieldReader>>("AnalysisPhotonsAuxDyn:caloClusterLinks"));
2456 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::VertexContainer, RNTFieldReader>>("AnalysisPhotonsAuxDyn:vertexLinks"));
2457 knownColumns.push_back(std::make_shared<ColumnDataVectorRLink<xAOD::TrackParticleContainer,RNTFieldReader>>("AnalysisMuonsAuxDyn:inDetTrackParticleLink"));
2458 knownColumns.push_back(std::make_shared<ColumnDataVectorRLink<xAOD::TrackParticleContainer,RNTFieldReader>>("AnalysisMuonsAuxDyn:combinedTrackParticleLink"));
2459 knownColumns.push_back(std::make_shared<ColumnDataVectorRLink< xAOD::TrackParticleContainer, RNTFieldReader>>("AnalysisMuonsAuxDyn:extrapolatedMuonSpectrometerTrackParticleLink"));
2460 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer, RNTFieldReader>>("GSFConversionVerticesAuxDyn:trackParticleLinks"));
2461 knownColumns.push_back(std::make_shared<ColumnDataVectorRLink<xAOD::TrackParticleContainer,RNTFieldReader>>("GSFTrackParticlesAuxDyn:originalTrackParticle"));
2462 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer, RNTFieldReader>>("AnalysisJetsAuxDyn:GhostTrack"));
2463 knownColumns.push_back(std::make_shared<ColumnDataVectorLink<xAOD::JetContainer, RNTFieldReader>>("METAssoc_AnalysisMETAux:.jetLink"));
2464 knownColumns.push_back(std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer, RNTFieldReader>>("METAssoc_AnalysisMETAux:.objectLinks"));
2465
2466 }
2467
2468
2469 // For METMaker we need to preplace all of the MET terms that we
2470 // expect to be used, that's what this line does.
2471 std::vector<std::string> allMetTermNames;
2472 for (const auto& td : testDefinitions)
2473 {
2474 for (const auto& name : td.metTermNames)
2475 {
2476 if (std::find (allMetTermNames.begin(), allMetTermNames.end(), name) == allMetTermNames.end())
2477 allMetTermNames.push_back (name);
2478 }
2479 }
2480
2481
2482 if (tree)
2483 {
2484 if (!allMetTermNames.empty())
2485 knownColumns.push_back(std::make_shared<ColumnDataOutputMet<BranchReader>>("OutputMET",allMetTermNames));
2486
2487 // For METMaker we need various extra columns to run. This may need
2488 // some work to avoid, but would likey be worth it.
2489 knownColumns.push_back(std::make_shared<ColumnDataOutVector<std::uint16_t, BranchReader>>("AnalysisMuons.objectType", xAOD::Type::Muon));
2490 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, BranchReader>>("AnalysisMuons.m", ParticleConstants::muonMassInMeV));
2491 knownColumns.push_back(std::make_shared<ColumnDataOutVector<std::uint16_t, BranchReader>>("AnalysisJets.objectType", xAOD::Type::Jet));
2492
2493 // These are columns that represent variables that are normally held
2494 // by METAssociationHelper, or alternatively are decorated on the
2495 // MET terms (even though they are per object).
2496 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, BranchReader>>("AnalysisMuons.MetObjectWeight", 0));
2497 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, BranchReader>>("AnalysisJets.MetObjectWeight", 0));
2498 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, BranchReader>>("AnalysisJets.MetObjectWeightSoft", 0));
2499 knownColumns.push_back(std::make_shared<ColumnDataOutVector<MissingETBase::Types::bitmask_t,BranchReader>>("METAssoc_AnalysisMET.useObjectFlags", 0));
2500 } else if (rntbackend)
2501 {
2502 if (!allMetTermNames.empty())
2503 knownColumns.push_back(std::make_shared<ColumnDataOutputMet<BranchReader>>("OutputMET",allMetTermNames));
2504
2505 // For METMaker we need various extra columns to run. This may need
2506 // some work to avoid, but would likey be worth it.
2507 knownColumns.push_back(std::make_shared<ColumnDataOutVector<std::uint16_t, RNTFieldReader>>("AnalysisMuons.objectType", xAOD::Type::Muon));
2508 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, RNTFieldReader>>("AnalysisMuons.m", ParticleConstants::muonMassInMeV));
2509 knownColumns.push_back(std::make_shared<ColumnDataOutVector<std::uint16_t, RNTFieldReader>>("AnalysisJets.objectType", xAOD::Type::Jet));
2510
2511 // These are columns that represent variables that are normally held
2512 // by METAssociationHelper, or alternatively are decorated on the
2513 // MET terms (even though they are per object).
2514 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, RNTFieldReader>>("AnalysisMuons.MetObjectWeight", 0));
2515 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, RNTFieldReader>>("AnalysisJets.MetObjectWeight", 0));
2516 knownColumns.push_back(std::make_shared<ColumnDataOutVector<float, RNTFieldReader>>("AnalysisJets.MetObjectWeightSoft", 0));
2517 knownColumns.push_back(std::make_shared<ColumnDataOutVector<MissingETBase::Types::bitmask_t,RNTFieldReader>>("METAssoc_AnalysisMET.useObjectFlags", 0));
2518 }
2519 } // namespace columnar
2520
2521 void ColumnarPhysLiteTest :: setupColumns (const ColumnVectorHeader& columnHeader)
2522 {
2523 using namespace asg::msgUserCode;
2524
2525 // Get all column info directly from the header (all tools have already
2526 // registered their columns via ToolColumnVectorMap)
2527 auto requestedColumns = columnHeader.getAllColumnInfo();
2528
2529 // Print requested columns
2530 for (auto& [name, info] : requestedColumns)
2531 std::cout << "requested columns: " << name << std::endl;
2532
2533 for (auto& column : knownColumns)
2534 {
2535 if (tree)
2536 {
2537 if (column->connect (tree, offsetColumns, requestedColumns))
2538 usedColumns.push_back (column);
2539 } else if (rntbackend)
2540 {
2541 if (column->connect(rntbackend, offsetColumns, requestedColumns))
2542 usedColumns.push_back(column);
2543 }
2544 }
2545
2546 std::set<std::string> unclaimedColumns;
2547 for (auto& column : requestedColumns)
2548 {
2549 if (!column.second.isOptional)
2550 unclaimedColumns.insert (column.first);
2551 else
2552 std::cout << "optional column not claimed: " << column.first << std::endl;
2553 }
2554 std::erase_if (unclaimedColumns, [&] (auto& columnName)
2555 {
2556 const auto& info = requestedColumns.at (columnName);
2557 if (info.accessMode != ColumnAccessMode::output || !info.fixedDimensions.empty())
2558 return false;
2559 auto offsetIter = std::find_if (usedColumns.begin(), usedColumns.end(), [&] (const std::shared_ptr<TestUtils::IColumnData>& column)
2560 {
2561 for (auto& output : column->outputColumns)
2562 {
2563 if (output.name == info.offsetName)
2564 return true;
2565 }
2566 return false;
2567 });
2568 if (offsetIter == usedColumns.end())
2569 return false;
2570 std::shared_ptr<TestUtils::IColumnData> myColumn;
2571 if (tree)
2572 {
2573 if (*info.type == typeid(float))
2574 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<float, BranchReader>>(info.name, 0);
2575 else if (*info.type == typeid(char))
2576 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<char, BranchReader>>(info.name, 0);
2577 else if (*info.type == typeid(std::uint16_t))
2578 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<std::uint16_t, BranchReader>>(info.name, 0);
2579 else if (*info.type == typeid(std::uint64_t))
2580 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<std::uint64_t, BranchReader>>(info.name, 0);
2581 else
2582 {
2583 ANA_MSG_WARNING("unhandled column type: " << info.name << " "<< info.type->name());
2584 return false;
2585 }
2586 } else if (rntbackend)
2587 {
2588 if (*info.type == typeid(float))
2589 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<float, RNTFieldReader>>(info.name,0);
2590 else if (*info.type == typeid(char))
2591 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<char, RNTFieldReader>>(info.name, 0);
2592 else if (*info.type == typeid(std::uint16_t))
2593 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<std::uint16_t, RNTFieldReader>>(info.name, 0);
2594 else if (*info.type == typeid(std::uint64_t))
2595 myColumn = std::make_shared<TestUtils::ColumnDataOutVector<std::uint64_t, RNTFieldReader>>(info.name, 0);
2596 else
2597 {
2598 ANA_MSG_WARNING("unhandled column type: " << info.name << " " << info.type->name());
2599 return false;
2600 }
2601 }
2602 knownColumns.push_back(myColumn);
2603 if (tree) {
2604 if (!myColumn->connect(tree, offsetColumns, requestedColumns))
2605 {
2606 ANA_MSG_WARNING("failed to connect dynamic output column: " << info.name);
2607 return false;
2608 }
2609 } else if (rntbackend)
2610 {
2611 if (!myColumn->connect(rntbackend, offsetColumns, requestedColumns))
2612 {
2613 ANA_MSG_WARNING("failed to connect dynamic output column: " << info.name);
2614 return false;
2615 }
2616 }
2617 usedColumns.push_back(myColumn);
2618 return true;
2619 });
2620 if (!unclaimedColumns.empty())
2621 {
2622 std::string message = "columns not claimed:";
2623 for (auto& column : unclaimedColumns)
2624 message += " " + column;
2625 throw std::runtime_error(message);
2626 }
2627 }
2628
2629 void ColumnarPhysLiteTest :: doCall (const TestDefinition& testDefinition)
2630 {
2631 doCallMulti ({testDefinition});
2632 }
2633
2634
2635void ColumnarPhysLiteTest ::doCallMulti(
2636 const std::vector<TestDefinition>& testDefinitions) {
2637 using namespace asg::msgUserCode;
2638 auto userConfiguration = TestUtils::UserConfiguration::fromEnvironment();
2639
2640 // apply systematics for all test definitions
2641 for (const auto& td : testDefinitions) {
2642 if (!td.sysName.empty()) {
2643 auto* sysTool = dynamic_cast<CP::ISystematicsTool*>(td.tool);
2644 if (!sysTool)
2645 throw std::runtime_error("tool does not support systematics");
2646 std::cout << "applying systematic variation: " << td.sysName << std::endl;
2647 if (sysTool->applySystematicVariation(CP::SystematicSet(td.sysName))
2648 .isFailure())
2649 throw std::runtime_error("failed to apply systematic variation: " +
2650 td.sysName);
2651 }
2652 }
2653
2654 if constexpr (columnarAccessMode == 2) {
2655 // Create shared column header for all tools
2656 ColumnVectorHeader columnHeader;
2657
2658 // Build vector of ToolData from all testDefinitions
2659 std::vector<TestUtils::ToolData> toolDataVec;
2660 for (const auto& td : testDefinitions)
2661 toolDataVec.emplace_back(userConfiguration, td, columnHeader);
2662
2663 setupKnownColumns(testDefinitions);
2664 // Set up columns using the shared header (all tools have already
2665 // registered their columns via ToolColumnVectorMap, so we get all columns
2666 // from the header)
2667 setupColumns(columnHeader);
2668
2669 // connect column indices from header to each column for direct setting
2670 for (auto& column : usedColumns)
2671 column->connectColumnIndices(columnHeader);
2672
2673 Benchmark benchmarkEmpty("empty");
2674 Benchmark benchmarkCheck("", userConfiguration.batchSize);
2675 auto numberOfEvents = 0;
2676 if (tree) {
2677 numberOfEvents = tree->GetEntries();
2678 } else if (rntbackend) {
2679 numberOfEvents = rntreader->GetNEntries();
2680 }
2681 Long64_t entry = 0;
2682 const auto startTime = std::chrono::high_resolution_clock::now();
2683 bool endLoop = false;
2684 for (; !endLoop; ++entry) {
2685 // just sample how much overhead there is for starting and
2686 // stopping the timer
2687 benchmarkEmpty.startTimer();
2688 benchmarkEmpty.stopTimer();
2689 ColumnVectorData columnData(&columnHeader);
2690 for (auto& column : usedColumns)
2691 column->getEntry(entry % numberOfEvents);
2692 if ((entry + 1) % userConfiguration.batchSize == 0) {
2693 if (entry < numberOfEvents) {
2694 for (auto& column : usedColumns)
2695 column->collectColumnData();
2696 }
2697 for (auto& column : usedColumns)
2698 column->setData(columnData);
2699
2700 // Check data once (shared column data)
2701 benchmarkCheck.startTimer();
2702 columnData.checkData();
2703 benchmarkCheck.stopTimer();
2704 // Call each tool
2705 for (auto& toolData : toolDataVec) {
2706 toolData.call(columnData);
2707 }
2708 for (auto& column : usedColumns)
2709 column->clearColumns();
2710 if ((std::chrono::high_resolution_clock::now() - startTime) >
2711 userConfiguration.targetTime)
2712 endLoop = true;
2713 } else if (entry + 1 == numberOfEvents) {
2714 for (auto& column : usedColumns)
2715 column->collectColumnData();
2716 }
2717 }
2718 std::cout << "Entries in file: " << numberOfEvents << std::endl;
2719 std::cout << "Total entries read: " << entry << std::endl;
2720 const float emptyTime = benchmarkEmpty.getEntryTime(0).value();
2721 std::cout << "Empty benchmark time: " << emptyTime << "ns (tick=" << Benchmark::getTickDuration() << "ns)" << std::endl;
2722 benchmarkEmpty.setSilence();
2723 const auto checkTime = benchmarkCheck.getEntryTime(emptyTime);
2724 if (checkTime)
2725 std::cout << "Check data time: " << checkTime.value() << "ns" << std::endl;
2726 benchmarkCheck.setSilence();
2727 {
2728 std::vector<TestUtils::BranchPerfData> branchPerfData;
2730 summary.name = "total";
2731 summary.timeRead = 0;
2732 summary.timeUnpack = 0;
2733 summary.timeShallowCopy = 0;
2734 summary.entrySize = 0;
2735 summary.uncompressedSize = 0;
2736 summary.numBaskets = 0;
2737 summary.entries = std::nullopt;
2738 summary.nullEntries = std::nullopt;
2739 for (auto& column : usedColumns)
2740 {
2741 branchPerfData.push_back (column->getPerfData (emptyTime));
2742 summary.timeRead.value() += branchPerfData.back().timeRead.value_or(0);
2743 summary.timeUnpack.value() += branchPerfData.back().timeUnpack.value_or(0);
2744 summary.entrySize.value() += branchPerfData.back().entrySize.value_or(0);
2745 summary.uncompressedSize.value() += branchPerfData.back().uncompressedSize.value_or(0);
2746 summary.numBaskets.value() += branchPerfData.back().numBaskets.value_or(0);
2747 summary.timeShallowCopy.value() += branchPerfData.back().timeShallowCopy.value_or(0);
2748 }
2749 std::sort (branchPerfData.begin(), branchPerfData.end(), [] (const auto& a, const auto& b) {return a.name < b.name;});
2750 branchPerfData.insert (branchPerfData.end(), summary);
2751 const std::size_t nameWidth = std::max_element (branchPerfData.begin(), branchPerfData.end(), [] (const auto& a, const auto& b) {return a.name.size() < b.name.size();})->name.size();
2752 std::string label = userConfiguration.isrntuple ? "field name" : "branch name";
2753 std::string header = std::format ("{:{}} | read(ns) | unpack(ns) | size(B) | rate(MB/s) | compression | baskets | entries | null", label, nameWidth);
2754 std::cout << "\n" << header << std::endl;
2755 std::cout << std::string (header.size(), '-') << std::endl;
2756 for (auto& data : branchPerfData)
2757 {
2758 if (data.name == "total")
2759 std::cout << std::string (header.size(), '-') << std::endl;
2760 std::cout << std::format ("{:{}} |", data.name, nameWidth);
2761 if (data.timeRead)
2762 std::cout << std::format ("{:>9.0f} |", data.timeRead.value());
2763 else
2764 std::cout << " |";
2765 if (data.timeUnpack)
2766 std::cout << std::format ("{:>11.1f} |", data.timeUnpack.value());
2767 else
2768 std::cout << " |";
2769 if (data.entrySize)
2770 std::cout << std::format ("{:>8.1f} |", data.entrySize.value());
2771 else
2772 std::cout << " |";
2773 if (data.timeRead && data.entrySize)
2774 std::cout << std::format ("{:>11.1f} |", (data.entrySize.value() / (data.timeRead.value() * 1e-3 * 1.024 * 1.024)));
2775 else
2776 std::cout << " |";
2777 if (data.entrySize && data.uncompressedSize)
2778 std::cout << std::format ("{:>12.2f} |", float (data.uncompressedSize.value()) / data.entrySize.value());
2779 else
2780 std::cout << " |";
2781 if (data.numBaskets)
2782 std::cout << std::format ("{:>8} |", data.numBaskets.value());
2783 else
2784 std::cout << " |";
2785 if (data.entries)
2786 std::cout << std::format ("{:>8.2f} |", static_cast<float>(data.entries.value())/numberOfEvents);
2787 else
2788 std::cout << " |";
2789 if (data.nullEntries && data.entries)
2790 std::cout << std::format ("{:>4.0f}%", static_cast<float>(data.nullEntries.value()) / data.entries.value() * 100.0f);
2791 std::cout << std::endl;
2792 }
2793 }
2794 {
2795 std::vector<TestUtils::ToolPerfData> toolPerfData;
2796 for (auto& toolData : toolDataVec)
2797 {
2798 toolPerfData.emplace_back ();
2799 toolPerfData.back().name = toolData.name;
2800 toolPerfData.back().timeCall = toolData.benchmarkCall.getEntryTime (emptyTime);
2801 if (userConfiguration.runToolTwice)
2802 toolPerfData.back().timeCall2 = toolData.benchmarkCall2.getEntryTime (emptyTime);
2803 }
2804 const std::size_t nameWidth = std::max_element (toolPerfData.begin(), toolPerfData.end(), [] (const auto& a, const auto& b) {return a.name.size() < b.name.size();})->name.size();
2805 std::string header = std::format ("{:{}} | call(ns) | call2(ns)", "tool name", nameWidth);
2806 std::cout << "\n" << header << std::endl;
2807 std::cout << std::string (header.size(), '-') << std::endl;
2808 for (auto& data : toolPerfData)
2809 {
2810 std::cout << std::format ("{:{}} |", data.name, nameWidth);
2811 if (data.timeCall)
2812 std::cout << std::format ("{:>9.0f} |", data.timeCall.value());
2813 else
2814 std::cout << " |";
2815 if (data.timeCall2)
2816 std::cout << std::format ("{:>10.0f}", data.timeCall2.value());
2817 else
2818 std::cout << " ";
2819 std::cout << std::endl;
2820 }
2821 // Add totals line for multiple tools
2822 if (toolPerfData.size() > 1)
2823 {
2824 std::optional<float> totalCall, totalCall2;
2825 for (const auto& data : toolPerfData)
2826 {
2827 if (data.timeCall)
2828 totalCall = totalCall.value_or (0) + data.timeCall.value();
2829 if (data.timeCall2)
2830 totalCall2 = totalCall2.value_or (0) + data.timeCall2.value();
2831 }
2832 std::cout << std::string (header.size(), '-') << std::endl;
2833 std::cout << std::format ("{:{}} |", "total", nameWidth);
2834 if (totalCall)
2835 std::cout << std::format ("{:>9.0f} |", totalCall.value());
2836 else
2837 std::cout << " |";
2838 if (totalCall2)
2839 std::cout << std::format ("{:>10.0f}", totalCall2.value());
2840 else
2841 std::cout << " ";
2842 std::cout << std::endl;
2843 }
2844 }
2845 } else if constexpr (columnarAccessMode == 0)
2846 {
2847 TestUtils::runXaodTest (userConfiguration, testDefinitions, file.get());
2848 } else if constexpr (columnarAccessMode == 100)
2849 {
2850 const auto& testDefinition = testDefinitions[0];
2851 TestUtils::runXaodArrayTest (userConfiguration, testDefinition, file.get());
2852 }
2853 }
2854}
#define ANA_MSG_WARNING(xmsg)
Macro printing warning messages.
void checkTime()
int numberOfEvents()
static Double_t a
A number of constexpr particle constants to avoid hardcoding them directly in various places.
if(pathvar)
size_t size() const
Number of registered mappings.
Interface for all CP tools supporting systematic variations.
Class to wrap a set of SystematicVariations.
bool isDefault() const
Test to see if this link is in the default state.
a class that holds the columnar data for a single call
void checkData() const
do a basic check of the data vector
void setColumn(std::size_t columnIndex, std::size_t size, CT *dataPtr)
set the data for the given column
the header information for the entire columnar data vector
static constexpr std::size_t nullIndex
the index used for an invalid index (always has to be 0)
std::unordered_map< std::string, ColumnInfo > getAllColumnInfo() const
get all columns as a map of ColumnInfo for use with IColumnData::connect
the base class for all columnar components
this is a simple benchmarking helper class wrapping timers from std::chrono
Definition Benchmark.h:51
static float getTickDuration()
Definition Benchmark.h:86
std::optional< float > getEntryTime(float emptyTime) const
Definition Benchmark.h:74
BranchReaderArray(const std::string &val_branchName)
std::span< const T > getEntry(Long64_t entry, std::size_t size)
BranchReaderArray(const BranchReaderArray &)=delete
std::optional< float > uncompressedSize() const
BranchReaderArray & operator=(const BranchReaderArray &)=delete
const std::string & branchName() const
std::optional< float > entrySize() const
BranchReader(const BranchReader &)=delete
BranchReader(const std::string &val_branchName)
std::optional< unsigned > numBaskets()
BranchReader & operator=(const BranchReader &)=delete
std::optional< float > uncompressedSize() const
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, std::unordered_map< std::string, ColumnInfo > &requestedColumns)=0
virtual void collectColumnData()=0
std::vector< OutputColumnInfo > outputColumns
virtual void setData(ColumnVectorData &columnData)=0
virtual ~IColumnData() noexcept=default
virtual void getEntry(Long64_t entry)=0
void connectColumnIndices(const ColumnVectorHeader &header)
lookup and store column indices from the header for all enabled output columns
virtual BranchPerfData getPerfData(float emptyTime)=0
std::vector< typename CM::LinkIndexType > m_columnData
const CM::LinkIndexType * data() const noexcept
void addLink(const ElementLink< T > &element, unsigned eventIndex)
std::vector< const std::vector< ColumnarOffsetType > * > m_targetOffsetColumns
void addTarget(const std::string &name, const std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, std::uint32_t clid=0)
std::vector< std::vector< typename CM::LinkKeyType > > m_keysColumns
void addSplitLink(std::size_t linkIndex, SG::sgkey_t linkKey, unsigned eventIndex)
std::unordered_map< SG::sgkey_t, std::unordered_set< std::size_t > > m_unknownKeysAllowedTargets
const std::vector< typename CM::LinkKeyType > & keysColumn(std::size_t index) const
std::vector< std::string > connect(const ColumnInfo &columnInfo, const std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, const std::unordered_map< std::string, ColumnInfo > &requestedColumns)
RNTFieldReader(const RNTFieldReader &)=delete
ROOT::Experimental::RNTupleInspector * m_inspector
void connectRNTuple(ROOT::RNTupleReader *reader, ROOT::Experimental::RNTupleInspector *inspector)
std::optional< float > entrySize() const
std::optional< float > uncompressedSize() const
RNTFieldReader & operator=(const RNTFieldReader &)=delete
std::unique_ptr< ROOT::RNTupleView< T > > m_view
RNTFieldReader(const std::string &val_fieldName)
std::string label(const std::string &format, int i)
Definition label.h:19
IAppMgrUI * Init(const char *options="POOLRootAccess/basic.opts")
Bootstraps (creates and configures) the Gaudi Application with the provided options file.
constexpr double muonMassInMeV
the mass of the muon (in MeV)
uint32_t sgkey_t
Type used for hashed StoreGate key+CLID pairs.
Definition sgkey_t.h:32
std::variant< TTree *, RNTupleBackend * > Backend
void runXaodArrayTest(const UserConfiguration &userConfiguration, const TestDefinition &testDefinition, TFile *file)
void runXaodTest(const UserConfiguration &userConfiguration, std::span< const TestDefinition > testDefinitions, TFile *file)
const std::unordered_map< std::string, SG::sgkey_t > knownSgKeys
lookup table from container name to its sgkey hash
Definition KnownSgKeys.h:32
constexpr unsigned columnarAccessMode
@ output
an output column
Definition ColumnInfo.h:25
void renameContainers(IColumnarTool &tool, const std::vector< std::pair< std::string, std::string > > &renames)
rename containers in the columnar tool
const std::string eventRangeColumnName
the default name for the column containing the event range
std::size_t ColumnarOffsetType
the type used for the size and offsets in the columnar data
SG::sgkey_t computeSgKey(const std::string &name, std::uint32_t clid)
compute the StoreGate hashed key for a container
constexpr ColumnarOffsetType invalidObjectIndex
the value for an invalid element index
Definition index.py:1
STL namespace.
void sort(typename DataModel_detail::iterator< DVL > beg, typename DataModel_detail::iterator< DVL > end)
Specialization of sort for DataVector/List.
std::size_t erase_if(T_container &container, T_Func pred)
@ Jet
The object is a jet.
Definition ObjectType.h:40
@ Muon
The object is a muon.
Definition ObjectType.h:48
StatusCode Init(const char *appname)
Function initialising ROOT/PyROOT for using the ATLAS EDM.
Definition Init.cxx:31
a struct that contains meta-information about each column that's needed to interface the column with ...
Definition ColumnInfo.h:36
std::string soleLinkTargetName
for simple link columns: the name of the target container
Definition ColumnInfo.h:132
std::uint32_t soleLinkTargetClid
for simple link columns: the CLID of the target container
Definition ColumnInfo.h:146
std::string name
the name of the column
Definition ColumnInfo.h:43
std::size_t LinkIndexType
the type used for columns that represent element links
static LinkIndexType mergeLinkKeyIndex(LinkIndexType key, LinkIndexType index)
merge a key and index value into a link value
columnar::TestUtils::RNTupleBackend * rntbackend
std::vector< std::shared_ptr< TestUtils::IColumnData > > knownColumns
std::vector< std::shared_ptr< TestUtils::IColumnData > > usedColumns
std::unique_ptr< ROOT::Experimental::RNTupleInspector > inspector
std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > offsetColumns
void setupKnownColumns(std::span< const TestUtils::TestDefinition > testDefinitions)
void setupColumns(const ColumnVectorHeader &columnHeader)
std::unique_ptr< ROOT::RNTupleReader > rntreader
void doCallMulti(const std::vector< TestUtils::TestDefinition > &testDefinitions)
the performance data for reading a single branch/column
virtual void setData(ColumnVectorData &columnData) override
std::array< ColumnarOffsetType, 2 > data
virtual BranchPerfData getPerfData(float) override
virtual bool connect(Backend, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
std::vector< ColumnarOffsetType > offsets
Reader< std::vector< std::string > > branchReader
ColumnDataMetNames(const std::string &val_branchName)
virtual BranchPerfData getPerfData(float emptyTime) override
virtual void getEntry(Long64_t entry) override
virtual void setData(ColumnVectorData &colData) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
const std::vector< ColumnarOffsetType > * offsetColumn
virtual void setData(ColumnVectorData &columnData) override
ColumnDataOutVector(const std::string &val_columnName, const T &val_defaultValue)
virtual BranchPerfData getPerfData(float) override
std::vector< ColumnarOffsetType > namesOffsets
const std::vector< ColumnarOffsetType > * offsetColumns
virtual BranchPerfData getPerfData(float) override
std::vector< ColumnarOffsetType > offsets
virtual void setData(ColumnVectorData &colData) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
ColumnDataOutputMet(const std::string &val_columnName, std::vector< std::string > val_termNames)
virtual void setData(ColumnVectorData &colData) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
virtual BranchPerfData getPerfData(float emptyTime) override
ColumnDataSamplingPattern(const std::string &val_branchName)
Reader< xAOD::CaloClusterContainer > branchReader
virtual void getEntry(Long64_t entry) override
virtual void setData(ColumnVectorData &columnData) override
ColumnDataScalar(const std::string &val_branchName)
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
virtual void getEntry(Long64_t entry) override
virtual BranchPerfData getPerfData(float emptyTime) override
ColumnDataVectorVectorVector(const std::string &val_branchName)
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
virtual BranchPerfData getPerfData(float emptyTime) override
Reader< std::vector< std::vector< std::vector< T > > > > branchReader
virtual void setData(ColumnVectorData &colData) override
virtual BranchPerfData getPerfData(float emptyTime) override
Reader< std::vector< std::vector< T > > > branchReader
virtual void setData(ColumnVectorData &colData) override
virtual void getEntry(Long64_t entry) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
ColumnDataVectorVector(const std::string &val_branchName)
virtual void getEntry(Long64_t entry) override
virtual bool connect(Backend source, std::unordered_map< std::string, const std::vector< ColumnarOffsetType > * > &offsetColumns, std::unordered_map< std::string, ColumnInfo > &requestedColumns) override
const std::vector< ColumnarOffsetType > * offsetColumn
std::vector< ColumnarOffsetType > offsets
ColumnDataVector(const std::string &val_branchName)
virtual void setData(ColumnVectorData &columnData) override
virtual BranchPerfData getPerfData(float emptyTime) override
ROOT::Experimental::RNTupleInspector * inspector
std::vector< std::string > metTermNames
the MET output term names (if empty, MET output columns are omitted)
std::vector< std::pair< std::string, std::string > > containerRenames
the container name remappings to apply
static UserConfiguration fromEnvironment()
create a UserConfiguration, loading from the file pointed to by the COLUMNAR_TEST_CONFIG environment ...
TChain * tree