24 #include "boost/io/ios_state.hpp"
29 m_meas_post_ini(true),
30 m_meas_post_1stevt(true),
31 m_meas_post_lastevt(true),
32 m_meas_post_fin(true),
33 m_last_stepidx(INT_MAX),
34 m_last_nevts(UINT_MAX),
36 m_need_finalise(true),
39 m_vmem_sum_evtloop(0),
42 m_stepprefix(
"step "),
45 m_malloc_status(
"ok"),
46 m_malloc_might_get_slow(false),
47 m_overhead_cpu_tmp(0),
48 m_overhead_cpu_total(0),
50 m_overhead_cpu_snapshot_post_1stevt(-1),
51 m_overhead_cpu_snapshot_post_lastevt(-1)
56 {
delete[]
new char[11];
delete[]
new char[117];
delete[]
new char[17];}
59 if (!jobStartJiffies.empty()) {
69 double m0=get_malloc_kb();
char *
volatile c =
new char[1024];
double m1=get_malloc_kb();
delete[]
c;
73 std::cout<<
m_prefix<<
"WARNING Can't retrieve Malloc information in this setup.";
75 std::cout<<
" This is likely because mallinfo is in a different lib than malloc.";
78 #if __GLIBC__<=2 && __GLIBC_MINOR__<=5
86 #if DEBUG_SemiDetHelper
95 #if DEBUG_SemiDetHelper
96 m_debug_meas.capture(
false);
97 m_debug_data.addPointStart(m_debug_meas);
111 m_nevts=
std::max(m_nevts,val_nevts);
112 if (m_last_stepidx!=stepidx||m_last_nevts!=val_nevts)
113 specialAud(stepidx,val_nevts);
117 #if DEBUG_SemiDetHelper
118 m_debug_meas.capture(
true);
119 m_debug_data.addPointStop(m_debug_meas);
125 #if DEBUG_SemiDetHelper
126 m_debug_meas.capture(
false);
127 m_debug_data.addPointStart(m_debug_meas);
130 m_meas.capture(
true);
131 m_overhead_cpu_tmp=m_meas.last_cpu_raw_ms();
140 stopAudStd(
compName,stepidx,val_nevts);
143 m_meas.incrementOffsets(0,0,dcpu);
144 m_overhead_cpu_total+=dcpu; ++m_overhead_cpu_n;
146 #if DEBUG_SemiDetHelper
147 m_debug_meas.capture(
true);
148 m_debug_data.addPointStop(m_debug_meas);
156 return &(mapLookup(
compName,m_data_std,m_data_std_lastlookup));
172 m_meas_post_1stevt.captureIfUnused();
173 if (m_overhead_cpu_snapshot_post_1stevt<0)
174 m_overhead_cpu_snapshot_post_1stevt=m_overhead_cpu_total;
175 m_meas.capture(
false);
179 [[maybe_unused]]
static const bool first = [&]() {
183 { std::string
dummy(
"libstringspy_trigger_report"); }
184 {
delete[]
new char[13];
delete[]
new char[117];
delete[]
new char[17]; }
187 m_meas_post_lastevt.captureIfUnused();
188 if (m_overhead_cpu_snapshot_post_lastevt<0)
189 m_overhead_cpu_snapshot_post_lastevt=m_overhead_cpu_total;
191 m_meas_post_ini.captureIfUnused();
193 data=&(
d->data[stepidx]);
195 startAudFinishUp(
data);
199 m_meas.capture(
false);
200 double dcpu=m_meas.last_cpu_raw_ms()-m_overhead_cpu_tmp;
202 m_meas.incrementOffsets(0,0,dcpu);
203 m_overhead_cpu_total+=dcpu;
205 data->addPointStart(m_meas);
212 d->data_evt.addPointStop(m_meas,val_nevts);
214 d->data[stepidx].addPointStop(m_meas);
224 if (m_overhead_cpu_n>1000&&m_overhead_cpu_total>0.2*m_overhead_cpu_n) {
227 m_meas.disableMalloc();
230 else m_malloc_status=
"autooff";
231 m_malloc_might_get_slow=
false;
232 std::cout<<m_prefix<<
"WARNING Malloc information gathering becoming slow. Disabling."<<std::endl;
238 if (m_last_stepidx!=stepidx) {
242 m_meas_post_ini.captureIfUnused();
245 m_meas_post_1stevt.captureIfUnused();
248 m_meas_post_1stevt.captureIfUnused();
249 m_meas_post_lastevt.captureIfUnused();
250 if (m_malloc_might_get_slow)
261 m_vmem_sum_evtloop+=vmem;
262 m_rss_sum_evtloop+=rss;
264 double malloc(get_malloc_kb());
265 if (val_nevts<=100) {
266 m_fit_11to100_vmem.addPoint(val_nevts,vmem);
267 m_fit_11to100_malloc.addPoint(val_nevts,
malloc);
269 m_fit_101plus_vmem.addPoint(val_nevts,vmem);
270 m_fit_101plus_malloc.addPoint(val_nevts,
malloc);
273 m_last_nevts=val_nevts;
274 if (m_malloc_might_get_slow)
277 m_last_stepidx=stepidx;
283 m_tmp_otherkey.second=stepName;
284 CompDataBasic&
data=mapLookup(m_tmp_otherkey,m_data_other,m_data_other_lastlookup);
285 startAudFinishUp(&
data);
291 m_tmp_otherkey.second=stepName;
292 CompDataBasic&
data=mapLookup(m_tmp_otherkey,m_data_other,m_data_other_lastlookup);
294 data.addPointStop(m_meas);
310 if (!m_need_finalise)
312 m_need_finalise=
false;
313 m_meas_post_fin.captureIfUnused();
318 const std::string& info_full_output_inside)
323 f.open(
file.c_str());
324 if (!
f.fail()&&
f.is_open()) {
325 actualReport(
f,
true,info_full_output_inside);
331 std::cout<<m_prefix<<
"WARNING Could not write output to file: "<<
file<<std::endl;
338 actualReport(std::cout,showall,info_full_output_inside);
342 const std::string& info_full_output_inside,
343 bool stdout_uncollapsed)
346 std::string actualfile=
file;
347 bool fileok=reportToFile(
file,info_full_output_inside);
349 actualfile+=
" (failed)";
350 reportToStdout(info_full_output_inside,stdout_uncollapsed);
355 const std::string& stepName,
356 const std::string& infoName,
359 double vmem(0),
malloc(0),cpu(0),wall(0);
368 bool special=
m.hasWallTime();
370 os<<m_prefix<<
"["<<stepName<<
"]";
371 os<<
" ";field_i(
os,4,
n);
372 os<<
" ";field_f(
os,
w,cpu,
false);
375 field_f(
os,
w,wall,
false);
377 os<<
" ";field_f(
os,
w,vmem,
false);
384 const std::string& stepName,
387 bool normal_as_extended)
const
390 os<<m_prefix<<
"["<<stepName<<
"]";
403 os<<
" ";field_i(
os,4,
data->nEntries());
404 os<<
" ";field_f(
os,6,
data->meanDeltaCPU(),
false);
405 if (normal_as_extended) {
os <<
" "; }
406 os<<
" ";field_f(
os,7,
data->meanDeltaVMEM(),
false);
407 if (normal_as_extended) {
os <<
" "; }
408 os<<
" ";field_f(
os,7,
data->meanDeltaMalloc(),
false);
409 if (normal_as_extended) {
os <<
" "; }
417 os<<m_prefix<<
"["<<stepName<<
"]";
421 field_f(
os,8,fit_vmem.
slope(),
false);
422 os<<
" ";field_f(
os,8,fit_malloc.
slope(),
false);
429 const unsigned width(80);
433 unsigned nright=nleft;
438 if ((nleft+nright+
str.size())<
width)
442 for (
unsigned i=0;
i<nleft;++
i)
os<<
"=";
444 for (
unsigned i=0;
i<nright;++
i)
os<<
"=";
453 os<<
" n cpu max@evt vmem max@evt malloc max@evt component\n";
455 os<<m_prefix;spaces(
os,(sw?sw->
stepName():m_specialstep).size()+2);
458 os<<
" cpu wall vmem malloc component";
460 os<<
" cpu vmem malloc component";
491 va=
a.data->sortValMemory();
492 vb=
b.data->sortValMemory();
495 va=
a.data->sortValCPU();vb=
b.data->sortValCPU();
502 va=
a.data->sortValMemory();
503 vb=
b.data->sortValMemory();
508 return *(
a.compName)<*(
b.compName);
525 unsigned collapse_limit=6;
526 double showfraction=1.0;
528 if (sw->
stepName()==
"evt") showfraction=0.99;
529 else if (sw->
stepName()==
"dso") showfraction=0.25;
530 else showfraction=0.8;
533 const std::string ignore_name(
"PerfMonSlice");
539 float totals_sortvals[nvals];
544 totals[
ival]=totals_sortvals[
ival]=0.0;
549 bool hasentry(
false);
551 if (*
compName==ignore_name)
continue;
552 if (
data->nEntries()==0)
continue;
573 double sum(0.0),
limit(showfraction*totals_sortvals[
ival]);
577 double v=
it->data->sortVal(
ival);
583 if (!
it->displayed())
584 it->setCollapsed(
it->data->nEntries());
591 std::map<unsigned,Count> collapse_count;
595 if (!
it->displayed())
596 mapLookup(
it->data->nEntries(),collapse_count,collapse_count_cache).val += 1;
599 if (!
it->displayed()&&collapse_count[
it->data->nEntries()].val<collapse_limit)
613 std::map<unsigned,CompDataBasic> collapse_map,totals_map;
617 unsigned n(
it->data->nEntries());
618 CompDataBasic& data_total = mapLookup(
n,totals_map,it_totals_cache);
619 data_total.
add(*(
it->data));
620 if (
it->displayed()) {
624 CompDataBasic& data_collapse = mapLookup(
n,collapse_map,it_collapse_cache);
625 data_collapse.
add(*(
it->data));
630 it_summary=collapse_map.begin();it_summaryE=collapse_map.end();
631 for (;it_summary!=it_summaryE;++it_summary) {
633 s<<
"[collapsed_"<<it_summary->second.nEntries()<<
"_comps]";
634 it_summary->second.setEntries(it_summary->first);
637 it_summary=totals_map.begin();it_summaryE=totals_map.end();
638 for (;it_summary!=it_summaryE;++it_summary) {
640 s<<
"[total_for_"<<it_summary->second.nEntries()<<
"_comps]";
641 it_summary->second.setEntries(it_summary->first);
648 boost::io::ios_base_all_saver ssave (
os);
650 #if DEBUG_SemiDetHelper
657 format(
os,showall?
"semi-detailed perfmon info v1.0f / start"
658 :
"semi-detailed perfmon info v1.0c / start");
659 format(
os,
"Documentation: https://twiki.cern.ch/twiki/bin/viewauth/Atlas/PerfMonSD",
false);
660 format(
os,
"Note that documentation includes recipe for easy parsing from python. ",
false);
661 format(
os,
"Units are milliseconds (cpu/wall) and kilobytes (vmem/malloc). ",
false);
662 if (!info_full_output_inside.empty()) {
663 const unsigned cw(71);
664 std::string
s;
s.reserve(cw);
665 s.clear();
s+=
"Full output inside: ";
s+=info_full_output_inside;
666 while (
s.size()<cw)
s+=
" ";
670 {
StepWrapperStd step_ini(
"ini",&m_data_std);processStep(
os,&step_ini,showall); }
671 {
StepWrapperStd step_1st(
"1st",&m_data_std);processStep(
os,&step_1st,showall); }
672 {
StepWrapperStd step_cbk(
"cbk",&m_data_std);processStep(
os,&step_cbk,showall); }
673 {
StepWrapperStd step_evt(
"evt",&m_data_std);processStep(
os,&step_evt,showall); }
674 {
StepWrapperStd step_fin(
"fin",&m_data_std);processStep(
os,&step_fin,showall); }
677 std::set<std::string> othersteps;
679 for (;it_other!=it_other_E;++it_other)
680 othersteps.insert((*it_other).first.second);
683 std::set<std::string>::const_iterator it_other_stepname(othersteps.begin()),it_other_stepnameE(othersteps.end());
684 for (;it_other_stepname!=it_other_stepnameE;++it_other_stepname) {
686 processStep(
os,&
step,showall);
693 m_meas_post_ini.wall-=m_meas_pre_ini.wall;
694 m_meas_post_1stevt.wall-=m_meas_pre_ini.wall;
695 m_meas_post_lastevt.wall-=m_meas_pre_ini.wall;
696 m_meas_post_fin.wall-=m_meas_pre_ini.wall;
697 m_meas_pre_ini.wall=0;
698 m_meas_post_ini.cpu-=m_meas_pre_ini.cpu;
699 m_meas_post_1stevt.cpu-=m_meas_pre_ini.cpu;
700 m_meas_post_lastevt.cpu-=m_meas_pre_ini.cpu;
701 m_meas_post_fin.cpu-=m_meas_pre_ini.cpu;
702 m_meas_pre_ini.cpu=0;
704 unsigned evtloop_n(0), overhead_n(0);
705 double evtloop_cpu(0.),evtloop_wall(0.), overhead_cpu(0.), overhead_wall(0.);
706 double internalcpu_evtloop(0),internalcpu_onceperjob(0);
709 if (m_nevts>1 && !m_meas_post_lastevt.unused() && !m_meas_post_1stevt.unused()) {
710 evtloop_cpu = (m_meas_post_lastevt.cpu -m_meas_post_1stevt.cpu )/(m_nevts-1);
711 evtloop_wall = (m_meas_post_lastevt.wall-m_meas_post_1stevt.wall)/(m_nevts-1);
713 internalcpu_evtloop=(m_overhead_cpu_snapshot_post_lastevt-m_overhead_cpu_snapshot_post_1stevt)/(m_nevts-1);
714 if (!m_meas_pre_ini.unused()&&!m_meas_post_fin.unused()) {
715 overhead_cpu = m_meas_post_fin.cpu -m_meas_pre_ini.cpu-evtloop_n*evtloop_cpu;
716 overhead_wall = m_meas_post_fin.wall-m_meas_pre_ini.wall-evtloop_n*evtloop_wall;
718 internalcpu_onceperjob=m_overhead_cpu_total-evtloop_n*internalcpu_evtloop;
722 os<<m_prefix<<
"[---] ";field_i(
os,4,evtloop_n);
os<<
" ";field_f(
os,8,evtloop_cpu);
723 os<<
" ";field_f(
os,8,evtloop_wall);
os<<
" - - evtloop_time\n";
724 os<<m_prefix<<
"[---] ";field_i(
os,4,overhead_n);
os<<
" ";field_f(
os,8,overhead_cpu);
725 os<<
" ";field_f(
os,8,overhead_wall);
os<<
" - - overhead_time\n";
726 const std::string
s=m_specialstep;
727 format(
os,
s,
"snapshot_pre_ini",m_meas_pre_ini);
728 format(
os,
s,
"snapshot_post_ini",m_meas_post_ini);
729 format(
os,
s,
"snapshot_post_1stevt",m_meas_post_1stevt);
730 format(
os,
s,
"snapshot_post_lastevt",m_meas_post_lastevt);
731 format(
os,
s,
"snapshot_post_fin",m_meas_post_fin);
732 format(
os,
s,
"leakperevt_evt11to100",m_fit_11to100_vmem,m_fit_11to100_malloc);
733 format(
os,
s,
"leakperevt_evt101plus",m_fit_101plus_vmem,m_fit_101plus_malloc);
734 os<<m_prefix<<
"["<<
s<<
"] vmem_peak="<<
int(m_vmpeak+0.5)
735 <<
" vmem_mean="<<(m_nevts_sum?
int(m_vmem_sum_evtloop/m_nevts_sum+.5):0)
736 <<
" rss_mean="<< (m_nevts_sum?
int( m_rss_sum_evtloop/m_nevts_sum+.5):0)<<
"\n";
737 os<<m_prefix<<
"["<<
s<<
"] jobcfg_walltime="<<
int(m_cfg_walltime+0.5)<<
" jobstart="<<m_starttime<<
"\n";
740 <<
" release="<<
envvar(
"AtlasVersion",
"unknown_release")<<
"/"<<
envvar(
"CMTCONFIG",
"unknown_cmtconfig")<<
"\n";
741 os<<m_prefix<<
"["<<
s<<
"] cpu_model="<<
cpu_model()<<
"\n";
742 os<<m_prefix<<
"["<<
s<<
"] malloc="<<m_malloclibname<<
"/"<<m_malloc_status
743 <<
" pycintex_vmemfix="<<
envvar(
"ATHENA_PYCINTEX_MINVMEM",
"0")<<
"\n";
744 os<<m_prefix<<
"["<<
s<<
"] pmonsd_cost_onceperjob="<<
int(0.5+internalcpu_onceperjob)
745 <<
" pmonsd_cost_perevt="<<
int(0.5+internalcpu_evtloop)<<
"\n";
747 #if DEBUG_SemiDetHelper
750 os<<m_prefix<<
"Own resource consumption ["<<m_debug_data.nEntries()
751 <<
" calls]: <cpu> [ms]="<<m_debug_data.meanDeltaCPU()
752 <<
", total memory usage[kb]~="<<m_debug_data.meanDeltaMalloc()*m_debug_data.nEntries()
753 <<
", total memory usage vmem [kb]~="<<m_debug_data.meanDeltaVMEM()*m_debug_data.nEntries()
754 <<
", #map lookups="<<m_debug_nfinds
755 <<
", #map inserts="<<m_debug_ninserts
758 format(
os,
"semi-detailed perfmon info / end");