31 #ifndef XAOD_STANDALONE
45 #include <gtest/gtest.h>
59 const unsigned int batchSize = 1000;
66 std::chrono::time_point<std::chrono::high_resolution_clock>
m_start;
81 Benchmark (
const std::string& val_name,
unsigned val_batchSize = 1)
96 std::optional<float> getEntryTime (
float emptyTime)
const
103 auto getTotalTime ()
const
125 static const std::unordered_map<std::string,SG::sgkey_t> knownKeys =
127 {
"AnalysisMuons", 0x3a6b126f},
128 {
"AnalysisElectrons", 0x3902fec0},
129 {
"AnalysisPhotons", 0x35d1472f},
130 {
"AnalysisJets", 0x1afd1919},
131 {
"egammaClusters", 0x15788d1f},
132 {
"GSFConversionVertices", 0x1f3e85c9},
133 {
"InDetTrackParticles", 0x1d3890db},
134 {
"CombinedMuonTrackParticles", 0x340d9196},
135 {
"ExtrapolatedMuonTrackParticles", 0x14e35e9f},
136 {
"GSFTrackParticles", 0x2e42db0b},
137 {
"InDetForwardTrackParticles", 0x143c6846},
138 {
"MuonSpectrometerTrackParticles", 0x3993c8f3},
201 else if (
columnName.find (
".") != std::string::npos)
202 throw std::runtime_error (
"branch name does not contain AuxDyn or Aux: " +
m_branchName);
215 throw std::runtime_error (
"branch name does not contain AuxDyn or Aux: " +
m_branchName);
222 throw std::runtime_error (
"failed to get branch: " +
m_branchName);
233 throw std::runtime_error (
"branch not connected: " +
m_branchName);
237 throw std::runtime_error (
"got nullptr reading data for branch: " +
m_branchName);
250 return static_cast<float>(
m_branch->GetZipBytes()) /
m_branch->GetEntries();
257 return static_cast<float>(
m_branch->GetTotBytes()) /
m_branch->GetEntries();
266 return m_branch->GetListOfBaskets()->GetSize();
293 else if (
columnName.find (
".") != std::string::npos)
294 throw std::runtime_error (
"branch name does not contain AuxDyn or Aux: " +
m_branchName);
307 throw std::runtime_error (
"branch name does not contain AuxDyn or Aux: " +
m_branchName);
314 throw std::runtime_error (
"failed to get branch: " +
m_branchName);
323 throw std::runtime_error (
"branch not connected: " +
m_branchName);
338 return static_cast<float>(
m_branch->GetZipBytes()) /
m_branch->GetEntries();
345 return static_cast<float>(
m_branch->GetTotBytes()) /
m_branch->GetEntries();
354 return m_branch->GetListOfBaskets()->GetSize();
386 std::array<ColumnarOffsetType, 2>
data = {0, 0};
393 virtual bool connect (TTree * , std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& , std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
424 result.name =
"EventCount(auto)";
438 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+
"(unpack)"), benchmark (branchReader.columnName())
443 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& , std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
446 if (
iter == requestedColumns.end())
449 requestedColumns.erase (
iter);
463 benchmark.startTimer ();
465 benchmark.stopTimer ();
466 benchmarkUnpack.startTimer ();
467 outData.push_back (branchData);
468 benchmarkUnpack.stopTimer ();
481 result.timeRead = benchmark.getEntryTime(emptyTime);
482 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
483 benchmark.setSilence();
484 benchmarkUnpack.setSilence();
496 const std::vector<ColumnarOffsetType>* offsetColumn =
nullptr;
497 std::vector<ColumnarOffsetType> offsets = {0};
503 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+
"(unpack)"), benchmark (branchReader.columnName())
509 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
512 if (
iter == requestedColumns.end())
519 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(1).name);
521 requestedColumns.erase (
iter);
523 if (
auto offsetIter = offsetColumns.find (
outputColumns.at(1).name); offsetIter != offsetColumns.end())
524 offsetColumn = offsetIter->second;
529 if (
iter != requestedColumns.end())
531 requestedColumns.erase (
iter);
541 offsets.push_back (0);
547 benchmark.startTimer ();
549 benchmark.stopTimer ();
550 benchmarkUnpack.startTimer ();
551 outData.insert (outData.end(), branchData.begin(), branchData.end());
552 offsets.push_back (outData.size());
553 benchmarkUnpack.stopTimer ();
564 if (offsetColumn->size() != offsets.size())
565 throw std::runtime_error (
"offset column not filled yet: " +
outputColumns.at(1).name);
566 if (offsetColumn->back() != offsets.back())
567 throw std::runtime_error (
"offset column does not match: " +
outputColumns.at(1).name);
575 result.timeRead = benchmark.getEntryTime(emptyTime);
576 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
577 benchmark.setSilence();
578 benchmarkUnpack.setSilence();
590 const std::vector<ColumnarOffsetType>* offsetColumn =
nullptr;
594 : defaultValue (val_defaultValue)
599 virtual bool connect (TTree * , std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
602 if (
iter == requestedColumns.end())
608 const auto offsetName =
iter->second.offsetName;
609 if (offsetName.empty())
610 throw std::runtime_error (
"missing offset column for: " +
outputColumns.at(0).name);
612 requestedColumns.erase (
iter);
614 if (
auto offsetIter = offsetColumns.find (offsetName); offsetIter != offsetColumns.end())
615 offsetColumn = offsetIter->second;
617 throw std::runtime_error (
"missing offset column for: " +
outputColumns.at(0).name);
628 outData.resize (offsetColumn->back(), defaultValue);
649 std::vector<ColumnarOffsetType> offsets = {0};
655 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+
"(unpack)"), benchmark (branchReader.columnName())
661 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& , std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
664 if (
iter == requestedColumns.end())
671 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(1).name);
673 requestedColumns.erase (
iter);
676 if (
iter == requestedColumns.end())
678 requestedColumns.erase (
iter);
687 offsets.push_back (0);
692 benchmark.startTimer ();
694 benchmark.stopTimer ();
695 benchmarkUnpack.startTimer ();
696 for (
auto&
data : branchData)
698 columnData.insert (columnData.end(),
data.begin(),
data.end());
699 offsets.push_back (columnData.size());
701 benchmarkUnpack.stopTimer ();
716 result.timeRead = benchmark.getEntryTime(emptyTime);
717 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
718 benchmark.setSilence();
719 benchmarkUnpack.setSilence();
732 std::vector<ColumnarOffsetType> offsets = {0};
734 const std::vector<ColumnarOffsetType>* targetOffsetColumn =
nullptr;
741 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+
"(unpack)"), benchmark (branchReader.columnName())
747 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
750 if (
iter == requestedColumns.end())
757 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(1).name);
759 if (
iter->second.linkTargetNames.size() != 1)
760 throw std::runtime_error (
"expected exactly one link target name for: " +
outputColumns.at(0).name);
761 targetContainerName =
iter->second.linkTargetNames.at(0);
762 if (
auto keyIter = knownKeys.find (targetContainerName); keyIter != knownKeys.end())
763 targetKey = keyIter->second;
764 if (
auto offsetIter = offsetColumns.find (
iter->second.linkTargetNames.at(0)); offsetIter != offsetColumns.end())
765 targetOffsetColumn = offsetIter->second;
767 throw std::runtime_error (
"missing offset column: " +
iter->second.linkTargetNames.at(0));
769 requestedColumns.erase (
iter);
772 if (
iter == requestedColumns.end())
774 requestedColumns.erase (
iter);
783 offsets.push_back (0);
788 benchmark.startTimer ();
790 benchmark.stopTimer ();
791 benchmarkUnpack.startTimer ();
792 if (targetOffsetColumn->size() < 2)
793 throw std::runtime_error (
"target offset column not yet filled for: " +
outputColumns.at(0).name);
794 for (
auto&
data : branchData)
796 for (
auto& element :
data)
798 if (element.isDefault() || (element.key() == 0 && element.index() == 0))
802 columnData.push_back (element.index() + targetOffsetColumn->at (targetOffsetColumn->size()-2));
803 if (element.key() != targetKey)
807 targetKey = element.key();
808 std::cout <<
"assume target key for " << targetContainerName <<
" is " << std::hex << targetKey << std::dec << std::endl;
811 throw std::runtime_error(
812 std::format(
"target key mismatch: {:x} != {:x} for {} with element index {}",
813 element.key(), targetKey,
outputColumns.at(0).name, element.index())
819 offsets.push_back (columnData.size());
821 benchmarkUnpack.stopTimer ();
836 result.timeRead = benchmark.getEntryTime(emptyTime);
837 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
838 benchmark.setSilence();
839 benchmarkUnpack.setSilence();
852 std::vector<ColumnarOffsetType> outerOffsets = {0};
853 std::vector<ColumnarOffsetType> innerOffsets = {0};
859 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+
"(unpack)"), benchmark (branchReader.columnName())
866 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& , std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
869 if (
iter == requestedColumns.end())
876 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(1).name);
878 requestedColumns.erase (
iter);
881 if (
iter == requestedColumns.end())
886 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(2).name);
888 requestedColumns.erase (
iter);
891 if (
iter == requestedColumns.end())
894 requestedColumns.erase (
iter);
901 innerOffsets.clear();
902 innerOffsets.push_back (0);
903 outerOffsets.clear();
904 outerOffsets.push_back (0);
909 benchmark.startTimer ();
911 benchmark.stopTimer ();
912 benchmarkUnpack.startTimer ();
913 for (
auto& outerData : branchData)
915 for (
auto& innerData : outerData)
917 columnData.insert (columnData.end(), innerData.begin(), innerData.end());
918 innerOffsets.push_back (columnData.size());
920 outerOffsets.push_back (innerOffsets.size()-1);
922 benchmarkUnpack.stopTimer ();
930 tool.setColumn (
outputColumns.at(1).name, innerOffsets.size(), innerOffsets.data());
932 tool.setColumn (
outputColumns.at(2).name, outerOffsets.size(), outerOffsets.data());
939 result.timeRead = benchmark.getEntryTime(emptyTime);
940 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
941 benchmark.setSilence();
942 benchmarkUnpack.setSilence();
955 const std::vector<ColumnarOffsetType>* offsetColumn =
nullptr;
956 std::vector<ColumnarOffsetType> offsets = {0};
958 const std::vector<ColumnarOffsetType>* targetOffsetColumn =
nullptr;
965 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+
"(unpack)"), benchmark (branchReader.columnName())
971 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
974 if (
iter == requestedColumns.end())
981 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(1).name);
983 if (
iter->second.linkTargetNames.size() != 1)
984 throw std::runtime_error (
"expected exactly one link target name for: " +
outputColumns.at(0).name);
985 targetContainerName =
iter->second.linkTargetNames.at(0);
986 if (
auto keyIter = knownKeys.find (targetContainerName); keyIter != knownKeys.end())
987 targetKey = keyIter->second;
988 if (
auto targetOffsetIter = offsetColumns.find (
iter->second.linkTargetNames.at(0)); targetOffsetIter != offsetColumns.end())
989 targetOffsetColumn = targetOffsetIter->second;
991 throw std::runtime_error (
"missing offset column(vector-link): " +
iter->second.linkTargetNames.at(0));
993 requestedColumns.erase (
iter);
995 if (
auto offsetIter = offsetColumns.find (
outputColumns.at(1).name); offsetIter != offsetColumns.end())
996 offsetColumn = offsetIter->second;
1001 if (
iter != requestedColumns.end())
1004 requestedColumns.erase (
iter);
1014 offsets.push_back (0);
1019 benchmark.startTimer ();
1021 benchmark.stopTimer ();
1022 benchmarkUnpack.startTimer ();
1023 if (targetOffsetColumn->size() < 2)
1024 throw std::runtime_error (
"target offset column not yet filled for: " +
outputColumns.at(0).name);
1025 for (
auto& element : branchData)
1027 if (element.isDefault())
1031 columnData.push_back (element.index() + targetOffsetColumn->at (targetOffsetColumn->size()-2));
1032 if (element.key() != targetKey)
1036 targetKey = element.key();
1037 std::cout <<
"assume target key for " << targetContainerName <<
" is " << std::hex << targetKey << std::dec << std::endl;
1045 offsets.push_back (columnData.size());
1048 if (offsetColumn->size() != offsets.size())
1049 throw std::runtime_error (
"offset column not filled yet: " +
outputColumns.at(1).name);
1050 if (offsetColumn->back() != offsets.back())
1051 throw std::runtime_error (
"offset column does not match: " +
outputColumns.at(1).name);
1053 benchmarkUnpack.stopTimer ();
1059 tool.setColumn (
outputColumns.at(0).name, columnData.size(), columnData.data());
1068 result.timeRead = benchmark.getEntryTime(emptyTime);
1069 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1070 benchmark.setSilence();
1071 benchmarkUnpack.setSilence();
1079 template<
typename T>
1086 const std::vector<ColumnarOffsetType>* offsetColumn =
nullptr;
1087 std::vector<ColumnarOffsetType> offsets = {0};
1096 : branchReaderSize (val_branchName), branchReaderKey (val_branchName +
".m_persKey"), branchReaderIndex (val_branchName +
".m_persIndex"), benchmarkUnpack (branchReaderSize.columnName()+
"(unpack)"), benchmark (branchReaderSize.columnName())
1103 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
1106 if (
iter == requestedColumns.end())
1115 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(1).name);
1117 const auto& linkContainers =
iter->second.linkTargetNames;
1118 for (
const auto& container : linkContainers)
1120 if (
auto keyIter = knownKeys.find (container); keyIter != knownKeys.end())
1121 targetKeys.push_back (keyIter->second);
1123 throw std::runtime_error (
"no key known for link container: " + container);
1124 if (
auto targetOffsetIter = offsetColumns.find (container); targetOffsetIter != offsetColumns.end())
1125 targetOffsetColumns.push_back (targetOffsetIter->second);
1127 throw std::runtime_error (
"missing offset column: " + container);
1128 keyColumnData.push_back (keyColumnData.size());
1130 requestedColumns.erase (
iter);
1132 if (
auto offsetIter = offsetColumns.find (
outputColumns.at(1).name); offsetIter != offsetColumns.end())
1133 offsetColumn = offsetIter->second;
1138 if (
iter != requestedColumns.end())
1141 requestedColumns.erase (
iter);
1145 if (
iter != requestedColumns.end())
1148 requestedColumns.erase (
iter);
1158 offsets.push_back (0);
1163 benchmark.startTimer ();
1164 std::size_t branchDataSize = branchReaderSize.
getEntry (
entry);
1165 auto branchDataKey = branchReaderKey.
getEntry (
entry, branchDataSize);
1166 auto branchDataIndex = branchReaderIndex.
getEntry (
entry, branchDataSize);
1167 benchmark.stopTimer ();
1168 benchmarkUnpack.startTimer ();
1169 for (
auto& targetOffsetColumn : targetOffsetColumns)
1171 if (targetOffsetColumn->size() <= offsets.size())
1172 throw std::runtime_error (
"target offset column not yet filled for: " +
outputColumns.at(0).name);
1176 if (branchDataIndex[
index] ==
static_cast<UInt_t
>(-1))
1181 if (
auto keyIter =
std::find(targetKeys.begin(), targetKeys.end(), branchDataKey[
index]); keyIter != targetKeys.end())
1184 }
else if (targetKeys.empty())
1186 targetKeys.push_back (branchDataKey[
index]);
1188 std::cout <<
"assume target key for " <<
outputColumns.at(0).name <<
" is " << std::hex << branchDataKey[
index] << std::dec << std::endl;
1189 }
else if (branchDataKey[
index] != 0)
1191 std::ostringstream
error;
1192 error <<
"target key mismatch: read " << std::hex << branchDataKey[
index];
1193 error <<
", expected one of";
1194 for (
const auto&
key : targetKeys)
1197 throw std::runtime_error (std::move (
error).
str());
1199 if (keyIndex == CM::invalidLinkValue)
1201 columnData.push_back (CM::invalidLinkValue);
1204 auto& targetOffsetColumn = *targetOffsetColumns.at(keyIndex);
1205 auto targetOffset = targetOffsetColumn.at (offsets.size()-1);
1208 if (
linkIndex >= targetOffsetColumn.at(offsets.size()))
1209 throw std::runtime_error (
std::format (
"index out of range for link: {} >= {} (base index {})",
outputColumns.at(0).name,
linkIndex, targetOffsetColumn.at(offsets.size()), targetOffset));
1210 columnData.push_back (CM::mergeLinkKeyIndex (keyIndex, branchDataIndex[
index] + targetOffset));
1214 offsets.push_back (columnData.size());
1217 if (offsetColumn->size() != offsets.size())
1218 throw std::runtime_error (
"offset column not filled yet: " +
outputColumns.at(1).name);
1219 if (offsetColumn->back() != offsets.back())
1220 throw std::runtime_error (
"offset column does not match: " +
outputColumns.at(1).name);
1222 benchmarkUnpack.stopTimer ();
1228 tool.setColumn (
outputColumns.at(0).name, columnData.size(), columnData.data());
1232 tool.setColumn (
outputColumns.at(2).name, keyColumnData.size(), keyColumnData.data());
1239 result.timeRead = benchmark.getEntryTime(emptyTime);
1240 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1241 benchmark.setSilence();
1242 benchmarkUnpack.setSilence();
1250 template<
typename T>
1255 std::vector<ColumnarOffsetType> offsets = {0};
1264 bool checkUnknownKeys =
false;
1265 std::unordered_map<SG::sgkey_t,std::unordered_set<std::string>>
unknownKeys;
1268 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+
"(unpack)"), benchmark (branchReader.columnName())
1280 for (
auto& [
key, forbiddenContainer] : unknownKeys)
1282 std::cout <<
"unknown key: " << std::hex <<
key << std::dec <<
", allowed containers:";
1285 if (forbiddenContainer.find (container) == forbiddenContainer.end())
1286 std::cout <<
" " << container;
1288 std::cout << std::endl;
1292 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
1295 if (
iter == requestedColumns.end())
1302 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(1).name);
1304 if (
containers.empty() ||
iter->second.variantLinkKeyColumn.empty())
1305 throw std::runtime_error (
"no variant link containers for: " +
outputColumns.at(0).name);
1307 throw std::runtime_error (
"variant link key column mismatch: " +
iter->second.variantLinkKeyColumn +
" != " +
outputColumns.at(2).name);
1309 for ([[maybe_unused]]
auto& container :
containers)
1311 keysColumn.push_back (keysColumn.size()+1);
1312 if (!offsetColumns.contains (container))
1313 throw std::runtime_error (
"missing offset column(variant-link): " + container);
1314 containerOffsets.push_back (offsetColumns.at (container));
1315 if (
auto iter = knownKeys.find (container);
iter != knownKeys.end())
1317 containerKeys.push_back (
iter->second);
1320 checkUnknownKeys =
true;
1321 containerKeys.push_back (0
u);
1325 requestedColumns.erase (
iter);
1328 if (
iter != requestedColumns.end())
1331 requestedColumns.erase (
iter);
1335 if (
iter != requestedColumns.end())
1338 requestedColumns.erase (
iter);
1347 offsets.push_back (0);
1352 benchmark.startTimer ();
1354 benchmark.stopTimer ();
1355 benchmarkUnpack.startTimer ();
1356 for (
auto&
data : branchData)
1358 for (
auto& element :
data)
1360 if (element.isDefault())
1368 if (element.key() == containerKeys[
i])
1370 if (containerOffsets[
i]->back() <= element.index())
1372 key = keysColumn[
i];
1373 if (containerOffsets[
i]->
size() < 2)
1374 throw std::runtime_error (
"container offset not yet filled for: " +
containers[
i]);
1375 index = containerOffsets[
i]->at (containerOffsets[
i]->
size()-2) + element.index();
1379 if (
key == 0xff && checkUnknownKeys)
1384 auto& forbiddenContainers = unknownKeys[element.key()];
1387 if (containerOffsets[
i]->back() <= containerOffsets[
i]->at (containerOffsets[
i]->
size()-2) + element.index())
1391 columnData.push_back (CM::mergeLinkKeyIndex (
key,
index));
1394 offsets.push_back (columnData.size());
1396 benchmarkUnpack.stopTimer ();
1402 tool.setColumn (
outputColumns.at(0).name, columnData.size(), columnData.data());
1406 tool.setColumn (
outputColumns.at(2).name, keysColumn.size(), keysColumn.data());
1413 result.timeRead = benchmark.getEntryTime(emptyTime);
1414 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1415 benchmark.setSilence();
1416 benchmarkUnpack.setSilence();
1427 std::vector<ColumnarOffsetType> offsets = {0};
1434 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+
"(unpack)"), benchmark (branchReader.columnName())
1441 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& , std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
1444 if (
iter == requestedColumns.end())
1451 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(1).name);
1453 requestedColumns.erase (
iter);
1456 if (
iter == requestedColumns.end())
1461 requestedColumns.erase (
iter);
1464 if (
iter != requestedColumns.end())
1467 requestedColumns.erase (
iter);
1476 offsets.push_back (0);
1477 columnHashData.clear();
1482 benchmark.startTimer ();
1484 benchmark.stopTimer ();
1485 benchmarkUnpack.startTimer ();
1486 for (
auto&
data : branchData)
1488 columnData.insert (columnData.end(),
data.begin(),
data.end());
1489 offsets.push_back (columnData.size());
1490 columnHashData.push_back (std::hash<std::string> () (
data));
1492 benchmarkUnpack.stopTimer ();
1498 tool.setColumn (
outputColumns.at(0).name, columnData.size(), columnData.data());
1502 tool.setColumn (
outputColumns.at(2).name, columnHashData.size(), columnHashData.data());
1509 result.timeRead = benchmark.getEntryTime(emptyTime);
1510 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1511 benchmark.setSilence();
1512 benchmarkUnpack.setSilence();
1523 const std::vector<ColumnarOffsetType>* offsetColumns =
nullptr;
1524 std::vector<ColumnarOffsetType> offsets = {0};
1525 std::vector<ColumnarOffsetType> namesOffsets = {0};
1530 : termNames (std::move (val_termNames))
1532 outputColumns.push_back ({.name = val_columnName, .isOffset =
true});
1533 outputColumns.push_back ({.name = val_columnName +
".name.data"});
1534 outputColumns.push_back ({.name = val_columnName +
".name.offset", .isOffset =
true});
1535 outputColumns.push_back ({.name = val_columnName +
".nameHash"});
1538 virtual bool connect (TTree * , std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& offsetColumns, std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
1541 iter != requestedColumns.end())
1542 requestedColumns.erase (
iter);
1548 iter != requestedColumns.end())
1551 requestedColumns.erase (
iter);
1555 iter != requestedColumns.end())
1558 requestedColumns.erase (
iter);
1562 iter != requestedColumns.end())
1565 requestedColumns.erase (
iter);
1568 if (
auto offsetIter = offsetColumns.find (
outputColumns.at(0).name); offsetIter != offsetColumns.end())
1569 throw std::runtime_error (
"duplicate size column: " +
outputColumns.at(0).name);
1578 offsets.push_back (0);
1580 namesOffsets.clear ();
1581 namesOffsets.push_back (0);
1587 for (
const auto& termName : termNames)
1589 namesData.insert (namesData.end(), termName.begin(), termName.end());
1590 namesOffsets.push_back (namesData.size());
1591 namesHash.push_back (std::hash<std::string> () (termName));
1593 offsets.push_back (namesHash.size());
1603 tool.setColumn (
outputColumns.at(2).name, namesOffsets.size(), namesOffsets.data());
1619 std::vector<ColumnarOffsetType> offsets = {0};
1625 : branchReader (val_branchName), benchmarkUnpack (branchReader.columnName()+
".samplingPattern(fallback)(unpack)"), benchmark (branchReader.columnName() +
".samplingPattern(fallback)")
1631 virtual bool connect (TTree *
tree, std::unordered_map<std::string,
const std::vector<ColumnarOffsetType>*>& , std::unordered_map<std::string,ColumnInfo>& requestedColumns)
override
1634 if (
iter == requestedColumns.end())
1641 throw std::runtime_error (
"offset name mismatch: " +
iter->second.offsetName +
" != " +
outputColumns.at(1).name);
1643 requestedColumns.erase (
iter);
1646 if (
iter == requestedColumns.end())
1651 requestedColumns.erase (
iter);
1659 offsets.push_back (0);
1664 benchmark.startTimer ();
1666 benchmark.stopTimer ();
1667 benchmarkUnpack.startTimer ();
1668 for (
auto data : branchData)
1670 columnData.push_back (
data->samplingPattern());
1672 offsets.push_back (columnData.size());
1673 benchmarkUnpack.stopTimer ();
1679 tool.setColumn (
outputColumns.at(0).name, columnData.size(), columnData.data());
1688 result.timeRead = benchmark.getEntryTime(emptyTime);
1689 result.timeUnpack = benchmarkUnpack.getEntryTime(emptyTime);
1690 benchmark.setSilence();
1691 benchmarkUnpack.setSilence();
1704 static std::once_flag
flag;
1705 std::call_once (
flag, [] ()
1707 #ifdef XAOD_STANDALONE
1716 throw std::runtime_error (
"missing ASG_TEST_FILE_LITE_MC");
1719 throw std::runtime_error (
"failed to open file");
1720 tree =
dynamic_cast<TTree*
> (
file->Get (
"CollectionTree"));
1722 throw std::runtime_error (
"failed to open tree");
1729 static std::atomic<unsigned>
index = 0;
1743 knownColumns.push_back (std::make_shared<ColumnDataEventCount> ());
1745 tree->SetMakeClass (1);
1747 std::unordered_map<std::string,TBranch*>
branches;
1749 TIter branchIter (
tree->GetListOfBranches());
1750 TObject *
obj =
nullptr;
1751 while ((
obj = branchIter()))
1753 TBranch *
branch =
nullptr;
1754 if ((
branch =
dynamic_cast<TBranch*
>(
obj)))
1757 TIter subBranchIter (
branch->GetListOfBranches());
1758 while ((
obj = subBranchIter()))
1760 if (
auto subBranch =
dynamic_cast<TBranch*
>(
obj))
1761 branches.emplace (subBranch->GetName(), subBranch);
1769 if (
name.find (
"AuxDyn.") != std::string::npos ||
1770 name.find (
"Aux.") != std::string::npos)
1772 TClass *branchClass =
nullptr;
1773 EDataType branchType {};
1774 branch->GetExpectedType (branchClass, branchType);
1775 if (branchClass ==
nullptr)
1780 knownColumns.push_back (std::make_shared<ColumnDataScalar<std::int32_t>> (
branch->GetName()));
1783 knownColumns.push_back (std::make_shared<ColumnDataScalar<std::uint32_t>> (
branch->GetName()));
1786 knownColumns.push_back (std::make_shared<ColumnDataScalar<std::uint64_t>> (
branch->GetName()));
1789 knownColumns.push_back (std::make_shared<ColumnDataScalar<std::uint64_t>> (
branch->GetName()));
1792 knownColumns.push_back (std::make_shared<ColumnDataScalar<float>> (
branch->GetName()));
1800 if (*branchClass->GetTypeInfo() ==
typeid(std::vector<float>))
1802 knownColumns.push_back (std::make_shared<ColumnDataVector<float>> (
branch->GetName()));
1803 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<char>))
1805 knownColumns.push_back (std::make_shared<ColumnDataVector<char>> (
branch->GetName()));
1806 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::int8_t>))
1808 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int8_t>> (
branch->GetName()));
1809 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::uint8_t>))
1811 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint8_t>> (
branch->GetName()));
1812 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::int16_t>))
1814 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int16_t>> (
branch->GetName()));
1815 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::uint16_t>))
1817 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint16_t>> (
branch->GetName()));
1818 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::int32_t>))
1820 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int32_t>> (
branch->GetName()));
1821 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::uint32_t>))
1823 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint32_t>> (
branch->GetName()));
1824 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::int64_t>))
1826 knownColumns.push_back (std::make_shared<ColumnDataVector<std::int64_t>> (
branch->GetName()));
1827 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::uint64_t>))
1829 knownColumns.push_back (std::make_shared<ColumnDataVector<std::uint64_t>> (
branch->GetName()));
1830 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::vector<float>>))
1832 knownColumns.push_back (std::make_shared<ColumnDataVectorVector<float>> (
branch->GetName()));
1833 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::vector<std::int32_t>>))
1835 knownColumns.push_back (std::make_shared<ColumnDataVectorVector<std::int32_t>> (
branch->GetName()));
1836 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::vector<std::uint64_t>>))
1838 knownColumns.push_back (std::make_shared<ColumnDataVectorVector<std::uint64_t>> (
branch->GetName()));
1839 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::vector<std::vector<std::size_t>>>))
1841 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorVector<std::size_t>> (
branch->GetName()));
1842 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::vector<std::vector<unsigned char>>>))
1844 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorVector<unsigned char>> (
branch->GetName()));
1845 }
else if (*branchClass->GetTypeInfo() ==
typeid(std::vector<std::string>))
1847 knownColumns.push_back (std::make_shared<ColumnDataMetNames> (
branch->GetName()));
1859 knownColumns.push_back (std::make_shared<ColumnDataSamplingPattern> (
"egammaClusters"));
1864 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer>> (
"AnalysisElectronsAuxDyn.caloClusterLinks"));
1865 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer>> (
"AnalysisElectronsAuxDyn.trackParticleLinks"));
1866 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorLink<xAOD::CaloClusterContainer>> (
"AnalysisPhotonsAuxDyn.caloClusterLinks"));
1867 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorLink<xAOD::VertexContainer>> (
"AnalysisPhotonsAuxDyn.vertexLinks"));
1868 knownColumns.push_back (std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>> (
"AnalysisMuonsAuxDyn.inDetTrackParticleLink"));
1869 knownColumns.push_back (std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>> (
"AnalysisMuonsAuxDyn.combinedTrackParticleLink"));
1870 knownColumns.push_back (std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>> (
"AnalysisMuonsAuxDyn.extrapolatedMuonSpectrometerTrackParticleLink"));
1871 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorLink<xAOD::TrackParticleContainer>> (
"GSFConversionVerticesAuxDyn.trackParticleLinks"));
1872 knownColumns.push_back (std::make_shared<ColumnDataVectorSplitLink<xAOD::TrackParticleContainer>> (
"GSFTrackParticlesAuxDyn.originalTrackParticle"));
1873 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer>>(
"AnalysisJetsAuxDyn.GhostTrack"));
1874 knownColumns.push_back (std::make_shared<ColumnDataVectorLink<xAOD::JetContainer>>(
"METAssoc_AnalysisMETAux.jetLink"));
1875 knownColumns.push_back (std::make_shared<ColumnDataVectorVectorVariantLink<xAOD::IParticleContainer>>(
"METAssoc_AnalysisMETAux.objectLinks"));
1879 knownColumns.push_back (std::make_shared<ColumnDataOutputMet> (
"OutputMET", std::vector<std::string>{
"Muons",
"RefJet",
"MuonEloss",
"PVSoftTrk"}));
1883 knownColumns.push_back (std::make_shared<ColumnDataOutVector<std::uint16_t>> (
"AnalysisMuons.objectType",
xAOD::Type::Muon));
1885 knownColumns.push_back (std::make_shared<ColumnDataOutVector<std::uint16_t>> (
"AnalysisJets.objectType",
xAOD::Type::Jet));
1890 knownColumns.push_back (std::make_shared<ColumnDataOutVector<float>> (
"AnalysisMuons.MetObjectWeight", 0));
1891 knownColumns.push_back (std::make_shared<ColumnDataOutVector<float>> (
"AnalysisJets.MetObjectWeight", 0));
1892 knownColumns.push_back (std::make_shared<ColumnDataOutVector<float>> (
"AnalysisJets.MetObjectWeightSoft", 0));
1893 knownColumns.push_back (std::make_shared<ColumnDataOutVector<MissingETBase::Types::bitmask_t>> (
"METAssoc_AnalysisMET.useObjectFlags", 0));
1898 using namespace asg::msgUserCode;
1900 std::unordered_map<std::string,ColumnInfo> requestedColumns;
1905 std::cout <<
"requested columns: " <<
name << std::endl;
1907 for (
auto&
column : knownColumns)
1909 if (
column->connect (
tree, offsetColumns, requestedColumns))
1910 usedColumns.push_back (
column);
1913 std::set<std::string> unclaimedColumns;
1914 for (
auto&
column : requestedColumns)
1916 if (!
column.second.isOptional)
1917 unclaimedColumns.insert (
column.first);
1919 std::cout <<
"optional column not claimed: " <<
column.first << std::endl;
1921 std::erase_if (unclaimedColumns, [&] (
auto& columnName)
1923 const auto&
info = requestedColumns.at (columnName);
1926 auto offsetIter = std::find_if (usedColumns.begin(), usedColumns.end(), [&] (
const std::shared_ptr<TestUtils::IColumnData>&
column)
1928 for (auto& output : column->outputColumns)
1930 if (output.name == info.offsetName)
1935 if (offsetIter == usedColumns.end())
1937 std::shared_ptr<TestUtils::IColumnData> myColumn;
1938 if (*
info.type ==
typeid(
float))
1940 else if (*
info.type ==
typeid(
char))
1948 ANA_MSG_WARNING (
"unhandled column type: " << info.name <<
" " << info.type->name());
1951 knownColumns.push_back (myColumn);
1952 if (!myColumn->connect (
tree, offsetColumns, requestedColumns))
1954 ANA_MSG_WARNING (
"failed to connect dynamic output column: " << info.name);
1957 usedColumns.push_back (myColumn);
1960 if (!unclaimedColumns.empty())
1962 std::string
message =
"columns not claimed:";
1963 for (
auto&
column : unclaimedColumns)
1965 throw std::runtime_error (
message);
1971 using namespace asg::msgUserCode;
1977 throw std::runtime_error (
"tool does not support systematics");
1978 std::cout <<
"applying systematic variation: " <<
sysName << std::endl;
1980 throw std::runtime_error (
"failed to apply systematic variation: " +
sysName);
1985 if (!containerRenames.empty())
1990 setupKnownColumns ();
1991 setupColumns (toolWrapper);
1993 Benchmark benchmark (
name, batchSize);
1994 Benchmark benchmarkCheck (
name +
"(column check)", batchSize);
1995 Benchmark benchmarkEmpty (
"empty");
1997 const std::vector<ColumnarOffsetType>* offsetColumn =
nullptr;
1998 if (!container.empty())
2000 auto iter = offsetColumns.find (container);
2001 if (
iter == offsetColumns.end())
2002 throw std::runtime_error (
"missing size column: " + container);
2003 offsetColumn =
iter->second;
2006 const auto numberOfEvents =
tree->GetEntries();
2010 bool endLoop =
false;
2011 for (; !endLoop; ++
entry)
2015 benchmarkEmpty.startTimer ();
2016 benchmarkEmpty.stopTimer ();
2020 for (
auto&
column : usedColumns)
2024 if (
entry + 1 == numberOfEvents)
2025 std::cout <<
"average size: " <<
float (
totalSize + offsetColumn->back()) / numberOfEvents << std::endl;
2027 if ((
entry + 1) % batchSize == 0)
2031 for (
auto&
column : usedColumns)
2032 column->setData (toolColumnData);
2033 benchmarkCheck.startTimer ();
2035 benchmarkCheck.stopTimer ();
2036 benchmark.startTimer ();
2038 benchmark.stopTimer ();
2039 for (
auto&
column : usedColumns)
2045 std::cout <<
"Entries in file: " << numberOfEvents << std::endl;
2046 std::cout <<
"Total entries read: " <<
entry << std::endl;
2047 const float emptyTime = benchmarkEmpty.getEntryTime(0).value();
2048 std::cout <<
"Empty benchmark time: " << emptyTime <<
"ns" << std::endl;
2049 benchmarkEmpty.setSilence();
2051 std::vector<TestUtils::BranchPerfData> branchPerfData;
2053 for (
auto&
column : usedColumns)
2055 branchPerfData.push_back (
column->getPerfData (emptyTime));
2056 summary.timeRead.value() += branchPerfData.back().timeRead.value_or(0);
2057 summary.timeUnpack.value() += branchPerfData.back().timeUnpack.value_or(0);
2058 summary.entrySize.value() += branchPerfData.back().entrySize.value_or(0);
2059 summary.uncompressedSize.value() += branchPerfData.back().uncompressedSize.value_or(0);
2060 summary.numBaskets.value() += branchPerfData.back().numBaskets.value_or(0);
2062 std::sort (branchPerfData.begin(), branchPerfData.end(), [] (
const auto&
a,
const auto&
b) {return a.name < b.name;});
2063 branchPerfData.insert (branchPerfData.end(),
summary);
2064 const std::size_t nameWidth = std::max_element (branchPerfData.begin(), branchPerfData.end(), [] (
const auto&
a,
const auto&
b) {return a.name.size() < b.name.size();})->name.size();
2065 std::string
header =
std::format (
"{:{}} | read(ns) | unpack(ns) | size(B) | rate(MB/s) | compression | baskets",
"branch name", nameWidth);
2066 std::cout <<
"\n" <<
header << std::endl;
2067 std::cout << std::string (
header.size(),
'-') << std::endl;
2068 for (
auto&
data : branchPerfData)
2070 if (
data.name ==
"total")
2071 std::cout << std::string (
header.size(),
'-') << std::endl;
2077 if (
data.timeUnpack)
2085 if (
data.timeRead &&
data.entrySize)
2086 std::cout <<
std::format (
"{:>11.1f} |", (
data.entrySize.value() / (
data.timeRead.value() * 1
e-3 * 1.024 * 1.024)));
2089 if (
data.entrySize &&
data.uncompressedSize)
2090 std::cout <<
std::format (
"{:>12.2f} |",
float (
data.uncompressedSize.value()) /
data.entrySize.value());
2093 if (
data.numBaskets)
2095 std::cout << std::endl;
2099 std::vector<TestUtils::ToolPerfData> toolPerfData;
2100 toolPerfData.emplace_back ();
2101 toolPerfData.back().name =
name;
2102 toolPerfData.back().timeCall = benchmark.getEntryTime(emptyTime);
2103 toolPerfData.back().timeCheck = benchmarkCheck.getEntryTime(emptyTime);
2104 benchmark.setSilence();
2105 benchmarkCheck.setSilence();
2106 const std::size_t nameWidth = std::max_element (toolPerfData.begin(), toolPerfData.end(), [] (
const auto&
a,
const auto&
b) {return a.name.size() < b.name.size();})->name.size();
2107 std::string
header =
std::format (
"{:{}} | call(ns) | check(ns)",
"tool name", nameWidth);
2108 std::cout <<
"\n" <<
header << std::endl;
2109 std::cout << std::string (
header.size(),
'-') << std::endl;
2110 for (
auto&
data : toolPerfData)
2119 std::cout << std::endl;
2125 #ifdef XAOD_STANDALONE
2133 #ifdef XAOD_STANDALONE
2134 Benchmark benchmarkEmptyClear (
name +
" empty clear");
2135 Benchmark benchmarkCallClear (
name +
" call clear");
2136 Benchmark benchmarkPrepClear (
name +
" prep clear");
2138 Benchmark benchmarkCall (
name +
" call");
2139 Benchmark benchmarkCallCopyRecord (
name +
" call copy-record");
2140 Benchmark benchmarkCallRetrieve (
name +
" call retrieve");
2141 Benchmark benchmarkPrep (
name +
" prep");
2142 Benchmark benchmarkPrepCopyRecord (
name +
" prep copy-record");
2143 Benchmark benchmarkPrepRetrieve (
name +
" prep retrieve");
2144 Benchmark benchmarkGetEntry (
name +
" getEntry");
2146 const auto numberOfEvents =
event.getEntries();
2147 #ifdef XAOD_STANDALONE
2148 std::cout <<
"known container keys:" << std::endl;
2149 for (
auto& [container,
key] : columnar::TestUtils::knownKeys)
2151 std::cout <<
std::format (
" {} -> 0x{:x}, 0x{:x} -> {}", container,
event.getHash (container),
key,
event.getName (
key)) << std::endl;
2154 if (numberOfEvents == 0){
2155 throw std::runtime_error (
"ColumnarPhysLiteTest: numberOfEvents == 0");
2167 benchmarkGetEntry.startTimer ();
2168 event.getEntry (
entry % numberOfEvents);
2169 benchmarkGetEntry.stopTimer ();
2170 benchmarkPrepRetrieve.startTimer ();
2172 benchmarkPrepRetrieve.stopTimer ();
2173 benchmarkPrepCopyRecord.startTimer ();
2174 static const std::string prepPostfix =
"Prep";
2176 benchmarkPrepCopyRecord.stopTimer ();
2177 benchmarkPrep.startTimer ();
2179 benchmarkPrep.stopTimer ();
2180 #ifdef XAOD_STANDALONE
2181 benchmarkPrepClear.startTimer ();
2183 benchmarkPrepClear.stopTimer ();
2185 benchmarkCallRetrieve.startTimer ();
2187 benchmarkCallRetrieve.stopTimer ();
2188 benchmarkCallCopyRecord.startTimer ();
2189 static const std::string callPostfix =
"Call";
2191 benchmarkCallCopyRecord.stopTimer ();
2192 benchmarkCall.startTimer ();
2194 benchmarkCall.stopTimer ();
2195 #ifdef XAOD_STANDALONE
2196 benchmarkCallClear.startTimer ();
2198 benchmarkCallClear.stopTimer ();
2199 benchmarkEmptyClear.startTimer ();
2201 benchmarkEmptyClear.stopTimer ();
2204 std::cout <<
"Total entries read: " <<
entry << std::endl;