ATLAS Offline Software
HephProf.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
3 */
4 
5 #include <fstream>
6 #include <iostream>
7 #include <iomanip>
8 #include <map>
9 #include <string>
10 #include <utility>
11 
12 #include <stddef.h>
13 #include <getopt.h>
14 #include <glob.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <unistd.h>
18 
19 #include <sys/stat.h>
20 
21 #include <unordered_map>
22 #include "boost/io/ios_state.hpp"
23 
24 
25 //- the following function will be called on each error on input
26 int Usage( const char* prog ) {
27  std::cout << "usage: " << prog << '\n'
28  << " -i <input_name> : file pattern to use for input"
29  << std::endl;
30  return -1;
31 }
32 //-
33 
34 //- helper classes
35 class CallPoint;
36 typedef unsigned long Address_t;
37 typedef std::unordered_map< Address_t, std::string > Symbols_t;
38 typedef std::pair< Address_t, Address_t > CallInfo_t;
39 typedef std::map< CallInfo_t, CallPoint > CallTree_t;
40 
41 class CallPoint {
42 public:
45  }
46 
48 
49  unsigned long long m_allocated;
50  unsigned long long m_news;
51 
52  unsigned long long m_inclusive;
53  unsigned long long m_called;
54 };
55 
56 struct Record {
57  long size;
59 };
60 //-
61 
62 //- data
65 unsigned long long gTotalSize1 = 0;
66 unsigned long long gTotalSize2 = 0;
67 unsigned long long gTotalIncl = 0;
68 unsigned long long gTotalCalls = 0;
69 
70 static std::string gUnknown = "<unknown>";
71 static Address_t gMalloc = 0xCDCDCDCD;
72 //-
73 
74 inline std::string getSymbol( Address_t address ) {
75  Symbols_t::iterator isym = gSymbols.find( address );
76 
77  if ( isym != gSymbols.end() )
78  return isym->second;
79 
80  return gUnknown;
81 }
82 
83 inline bool isKnownSymbol( const CallInfo_t& call_info ) {
84  return gSymbols.find( call_info.second ) != gSymbols.end();
85 }
86 
87 inline bool isPseudoAllocator( const CallInfo_t& call_info ) {
88  return gPseudoAllocators.find( call_info.second ) != gPseudoAllocators.end();
89 }
90 
91 inline bool isAllocator( const CallInfo_t& call_info ) {
92  return gAllocators.find( call_info.second ) != gAllocators.end();
93 }
94 
95 inline void WeaveCallPoints( CallPoint& cp1, const CallPoint& cp2 ) {
96  cp1.m_allocated += cp2.m_allocated;
97  cp1.m_called += cp2.m_called;
98  cp1.m_inclusive += cp2.m_inclusive;
99  cp1.m_news += cp2.m_news;
100 
101  for ( CallTree_t::const_iterator icp2 = cp2.m_callees.begin();
102  icp2 != cp2.m_callees.end(); ++icp2 ) {
103 
104  CallTree_t::iterator icp1 = cp1.m_callees.find( icp2->first );
105  if ( icp1 != cp1.m_callees.end() )
106  WeaveCallPoints( icp1->second, icp2->second );
107  else
108  cp1.m_callees[ icp2->first ] = icp2->second;
109  }
110 }
111 
112 inline bool LocateAndWeave(
113  CallPoint& v1, const CallInfo_t& call_info, const CallPoint& callpoint ) {
114 
115  bool found = false;
116 
117  CallTree_t::iterator icpv1 = v1.m_callees.find( call_info );
118  if ( icpv1 != v1.m_callees.end() ) {
119  WeaveCallPoints( icpv1->second, callpoint );
120  found = true;
121 
122  } else {
123  for ( CallTree_t::iterator icp = v1.m_callees.begin();
124  icp != v1.m_callees.end(); ++icp ) {
125  found = LocateAndWeave( icp->second, call_info, callpoint );
126  if ( found )
127  break;
128  }
129  }
130 
131  return found;
132 }
133 
134 unsigned long long CalcSizeAndCalls( const CallInfo_t& call_info, CallPoint& callpoint ) {
135  unsigned long long size = 0;
136 
137  if ( isAllocator( call_info ) || isPseudoAllocator( call_info ) ) {
138 
139  size = 0;
140  for ( CallTree_t::iterator icp = callpoint.m_callees.begin();
141  icp != callpoint.m_callees.end(); ++icp ) {
142  size += CalcSizeAndCalls( icp->first, icp->second );
143  }
144 
145  callpoint.m_inclusive = callpoint.m_allocated;
146 
147  } else {
148 
149  size = callpoint.m_allocated;
150  for ( CallTree_t::iterator icp = callpoint.m_callees.begin();
151  icp != callpoint.m_callees.end(); ++icp ) {
152  size += CalcSizeAndCalls( icp->first, icp->second );
153  }
154 
155  callpoint.m_inclusive = size;
156 
157  }
158 
159  return size;
160 }
161 
162 void WriteCallPoint( std::ofstream& f,
163  const CallInfo_t& call_info, const CallPoint& callpoint ) {
164 
165  const std::string& call_name = getSymbol( call_info.second );
166 
167  f << "fl=athena.cxx\n";
168  f << "fn=" << call_name << '\n';
169  f << call_info.first << ' ' << callpoint.m_allocated
170  << ' ' << callpoint.m_news << '\n';
171 
172  if ( isAllocator( call_info ) )
173  gTotalSize2 += callpoint.m_allocated;
174  else if ( ! isPseudoAllocator( call_info ) )
175  gTotalSize1 += callpoint.m_allocated;
176 
177  for ( CallTree_t::const_iterator icp = callpoint.m_callees.begin();
178  icp != callpoint.m_callees.end(); ++icp ) {
179  f << "cfn=" << getSymbol( icp->first.second ) << '\n';
180  f << "calls=" << icp->second.m_called << ' ' << icp->first.second << '\n';
181  f << icp->first.first << ' ' << icp->second.m_inclusive
182  << ' ' << icp->second.m_called << '\n';
183  }
184  f << '\n';
185 
186  for ( CallTree_t::const_iterator icp = callpoint.m_callees.begin();
187  icp != callpoint.m_callees.end(); ++icp ) {
188  WriteCallPoint( f, icp->first, icp->second );
189  }
190 
191 }
192 
193 void ReadSymbols( const std::string& input ) {
194 // read symbol file
195  std::cout << "reading symbols from " << input << " ... " << std::endl;
196  FILE* fsyms = popen( ("gzip -dc " + input).c_str(), "r" );
197 
198  Address_t l;
199  while ( fread( (void*)&l, sizeof(l), 1, fsyms ) == 1) {
200  char s[4096];
201  size_t off = 0;
202  while (true) {
203  char c = 0;
204  int stat = fread( &c, sizeof(char), 1, fsyms );
205  if (stat <= 0 || c == '\n') break;
206  if (off < sizeof(s)-1)
207  s[off++] = c;
208  }
209  s[off] = '\0';
210 
211  gSymbols[ l ] = s;
212 
213  if ( strcmp( s, "operator new(unsigned int)" ) == 0 )
214  gAllocators[ l ] = s;
215  else if ( strcmp( s, "operator new(unsigned int, std::nothrow_t const&)" ) == 0 )
216  gAllocators[ l ] = s;
217 
218  else if ( strcmp( s, "operator new[](unsigned int)" ) == 0 )
219  gPseudoAllocators[ l ] = s;
220 
221  else if ( strncmp( s, "std::", 5 ) == 0 ||
222  strncmp( s, "__gnu_cxx", 9 ) == 0 ||
223  strstr( s, "_S_construct" ) != 0 ||
224  strstr( s, "_M_allocate_and_copy" ) != 0 ) {
225  gPseudoAllocators[ l ] = s;
226  }
227 
228  }
229 
230 // add explicitly by hand as it won't be on file
231  gAllocators[ gMalloc ] = "malloc";
232  gSymbols[ gMalloc ] = "malloc";
233 
234  pclose( fsyms );
235 }
236 
237 void ProcessProfile( const std::string& input, const std::string& output ) {
238 
239  boost::io::ios_base_all_saver coutsave (std::cout);
240  struct stat statbuf;
241  if ( stat( input.c_str(), &statbuf ) != 0 ) {
242  std::cout << "failed to stat " << input << " ... exiting ... " << std::endl;
243  return;
244  }
245 
246  double inv_total_bytes = 1. / static_cast<double> (statbuf.st_size);
247  {
248  unsigned long long byte_counter = 0, stack_counter = 0, total_size = 0;
249 
250  // read data file
251  std::cout << "reading traces from " << input << " ... " << std::endl;
252  FILE* fprof = popen( ("gzip -dc " + input).c_str(), "r" );
253 
254  long size = 0, nstack = 0, step_progress = 0, progress = 0;
255  const int maxstack = 128;
256  Address_t stacktrace[ maxstack ];
257 
258  std::cout.setf( std::ios::fixed, std::ios::floatfield );
259  std::cout << std::setprecision( 2 );
260 
261  std::cout << " progress: ["; std::cout.flush();
262  while ( fread( (void*)&size, sizeof(long), 1, fprof ) ) {
263  if (fread( (void*)&nstack, sizeof(long), 1, fprof ) < 1) break;
264  int nskip = 0;
265  if (nstack < 0)
266  nstack = 0;
267  else if (nstack > maxstack) {
268  nskip = nstack - maxstack;
269  nstack = maxstack;
270  }
271  if ((long)fread( (void*)stacktrace, sizeof(Address_t), nstack, fprof ) < nstack) break;
272  while (nskip > 0) {
273  Address_t dum;
274  if ((long)fread( &dum, sizeof(Address_t), 1, fprof ) < 1) break;
275  --nskip;
276  }
277 
278  byte_counter += (2+nstack)*4;
279  progress = long(2.5*byte_counter * inv_total_bytes); // estimate inflator at 40x
280  if ( progress > 100 ) progress = 100;
281  if ( step_progress < progress ) {
282  for ( ; step_progress <= progress; ++step_progress ) {
283  if ( ! (step_progress % 20) ) {
284  std::cout << step_progress << "%"; std::cout.flush();
285  } else if ( ! (step_progress % 5) ) {
286  std::cout << '.'; std::cout.flush();
287  }
288  }
289  }
290 
291  stack_counter += 1;
292  total_size += size;
293 
294  CallInfo_t caller_info( 0, stacktrace[nstack-1] );
295  CallPoint* caller = &gCallTree[ caller_info ];
296  caller->m_called += 1;
297 
298  CallInfo_t callee_info;
299 
300  for ( int i = nstack-1; i > 0; --i ) {
301  // a caller is the point one up in the stack, and it calls the function at
302  // the current level; the call point one up need not always call the same
303  // function, b/c of the use of virtual function calls or function pointers,
304  // and so it alone does not fully specify a point in the call graph
305 
306  callee_info = CallInfo_t( stacktrace[i], stacktrace[i-1] );
307 
308  // if caller is pseudo-allocator and calling same, then skip immediately to allocator
309  if ( isPseudoAllocator( caller_info ) ) {
310  // note that this skips over things like objects that are allocated in a default
311  // constructor of another object that is called when filling an STL container (the
312  // full blame goes to the allocator of the container)
313  callee_info = CallInfo_t( stacktrace[i], stacktrace[0] );
314 
315  if ( ! isKnownSymbol( callee_info ) ) {
316  // happens e.g. if an fstream file buffer is going through a system call; wire
317  // it directly onto malloc (does not particularly matter for the user, as the
318  // real blame is already upstream, but it does for the cross-check accounting)
319  callee_info.second = gMalloc;
320  }
321 
322  }
323 
324  CallPoint* callee = &caller->m_callees[ callee_info ];
325  callee->m_called += 1;
326 
327  // if calling pseudo- or real allocator, account memory to caller
328  if ( isPseudoAllocator( callee_info ) ) {
329  caller->m_allocated += size;
330  caller->m_news += 1;
331 
332  } else if ( isAllocator( callee_info ) ) {
333  caller->m_allocated += size;
334  caller->m_news += 1;
335 
336  // to make sure that kcachegrind will actually show the allocator
337  callee->m_allocated += size;
338  callee->m_news += 1;
339 
340  break;
341 
342  } else if ( i == 1 ) {
343  // this happens if malloc is called directly (i.e. operator new is
344  // not the bottom of the stack trace), so add it explicitly
345  CallInfo_t minfo( stacktrace[i-1], gMalloc );
346  CallPoint* mpoint = &callee->m_callees[ minfo ];
347 
348  callee->m_allocated += size;
349  callee->m_news += 1;
350 
351  mpoint->m_allocated += size;
352  mpoint->m_news += 1;
353  mpoint->m_called += 1;
354 
355  break;
356  }
357 
358  caller = callee;
359  caller_info = callee_info;
360  }
361 
362  }
363 
364  pclose( fprof );
365 
366  for ( ; step_progress <= 100; ++step_progress ) {
367  if ( ! (step_progress % 20) ) {
368  std::cout << step_progress << "%"; std::cout.flush();
369  } else if ( ! (step_progress % 5) ) {
370  std::cout << '.'; std::cout.flush();
371  }
372  }
373  std::cout << "]" << std::endl;
374  std::cout << "number of stacks: " << stack_counter << std::endl;
375  std::cout << "total alloc size: " << total_size << std::endl;
376  }
377 
378  {
379  // calculate inclusive numbers
380  std::cout << "calculating sizes ... " << std::endl;
381  for ( CallTree_t::iterator icp = gCallTree.begin(); icp != gCallTree.end(); ++icp ) {
382  CalcSizeAndCalls( icp->first, icp->second );
383  }
384 
385  }
386 
387  {
388  // create kcachegrind file
389  std::cout << "writing output to " << output << " ... (" << gCallTree.size() << " top level entries)" << std::endl;
390 
391  std::ofstream fresult( output.c_str() );
392  fresult << "events: memsize nallocs\n\n";
393 
394  fresult << "fl=athena.cxx\nfn=athena\n0 0\n";
395  for ( CallTree_t::iterator icp = gCallTree.begin(); icp != gCallTree.end(); ++icp ) {
396  fresult << "cfn=" << getSymbol( icp->first.second ) << '\n';
397  fresult << "calls=" << icp->second.m_called << ' ' << icp->first.second << '\n';
398  fresult << icp->first.first << ' ' << icp->second.m_inclusive
399  << ' ' << icp->second.m_called << '\n';
400  }
401  fresult << '\n';
402 
403  for ( CallTree_t::iterator icp = gCallTree.begin(); icp != gCallTree.end(); ++icp ) {
404  gTotalIncl += icp->second.m_inclusive;
405  gTotalCalls += icp->second.m_called;
406  WriteCallPoint( fresult, icp->first, icp->second );
407  }
408 
409  fresult.close();
410 
411  std::cout << "cross-check all calls: " << gTotalCalls << std::endl;
412  std::cout << "cross-check inclusive: " << gTotalIncl << std::endl;
413  std::cout << "cross-check-size (non-allocator): " << gTotalSize1 << std::endl;
414  std::cout << "cross-check-size (allocator): " << gTotalSize2 << std::endl;
415 
416  }
417 
418 // done, erase data for next call
419  gCallTree.clear();
420  gTotalSize1 = 0;
421  gTotalSize2 = 0;
422  gTotalIncl = 0;
423  gTotalCalls = 0;
424 }
425 
426 int main( int argc, char* argv [] ) {
427  int opt = -1;
428  std::string input = "hephaestus";
429 
430  while ( (opt = getopt( argc, argv, "i:") ) != -1 ) {
431  switch ( opt ) {
432  case 'i':
433  input = optarg;
434  break;
435  default: /* '?' */
436  return Usage( argv[0] );
437  }
438  }
439 
440  ReadSymbols( input+".symb" );
441 
442  glob_t globbuf;
443  globbuf.gl_offs = 0;
444  glob( (input + "*.prof").c_str(), GLOB_DOOFFS, NULL, &globbuf );
445 
446  std::cout << "files matching: " << globbuf.gl_pathc << std::endl;
447  for ( size_t ig = 0; ig < globbuf.gl_pathc; ++ig ) {
448  std::string filename = globbuf.gl_pathv[ ig ];
449  std::string::size_type firstdot = filename.find('.');
450  std::string evtnumber = filename.substr( firstdot, filename.rfind( '.' )-firstdot );
451  ProcessProfile( filename, filename.substr( 0, firstdot ) + ".cgc" + evtnumber );
452  }
453 
454  globfree( &globbuf );
455 
456  return 0;
457 }
xAOD::iterator
JetConstituentVector::iterator iterator
Definition: JetConstituentVector.cxx:68
DumpGeoConfig.prog
prog
Definition: DumpGeoConfig.py:167
gSymbols
Symbols_t gSymbols
Definition: HephProf.cxx:63
python.CaloRecoConfig.f
f
Definition: CaloRecoConfig.py:127
isAllocator
bool isAllocator(const CallInfo_t &call_info)
Definition: HephProf.cxx:91
python.SystemOfUnits.s
int s
Definition: SystemOfUnits.py:131
WriteCallPoint
void WriteCallPoint(std::ofstream &f, const CallInfo_t &call_info, const CallPoint &callpoint)
Definition: HephProf.cxx:162
CallPoint::m_allocated
unsigned long long m_allocated
Definition: HephProf.cxx:49
CalcSizeAndCalls
unsigned long long CalcSizeAndCalls(const CallInfo_t &call_info, CallPoint &callpoint)
Definition: HephProf.cxx:134
gTotalIncl
unsigned long long gTotalIncl
Definition: HephProf.cxx:67
CallInfo_t
std::pair< Address_t, Address_t > CallInfo_t
Definition: HephProf.cxx:38
UploadAMITag.l
list l
Definition: UploadAMITag.larcaf.py:158
main
int main(int argc, char *argv[])
Definition: HephProf.cxx:426
LArCellConditions.argv
argv
Definition: LArCellConditions.py:112
Usage
int Usage(const char *prog)
Definition: HephProf.cxx:26
Record::size
long size
Definition: HephProf.cxx:57
getSymbol
std::string getSymbol(Address_t address)
Definition: HephProf.cxx:74
python.setupRTTAlg.size
int size
Definition: setupRTTAlg.py:39
isKnownSymbol
bool isKnownSymbol(const CallInfo_t &call_info)
Definition: HephProf.cxx:83
gAllocators
Symbols_t gAllocators
Definition: HephProf.cxx:63
lumiFormat.i
int i
Definition: lumiFormat.py:92
PlotPulseshapeFromCool.input
input
Definition: PlotPulseshapeFromCool.py:106
gPseudoAllocators
Symbols_t gPseudoAllocators
Definition: HephProf.cxx:63
CallPoint::m_inclusive
unsigned long long m_inclusive
Definition: HephProf.cxx:52
CallPoint::m_callees
CallTree_t m_callees
Definition: HephProf.cxx:47
DQHistogramMergeRegExp.argc
argc
Definition: DQHistogramMergeRegExp.py:20
Record
Definition: HephProf.cxx:56
beamspotman.stat
stat
Definition: beamspotman.py:266
CallPoint::m_called
unsigned long long m_called
Definition: HephProf.cxx:53
merge.output
output
Definition: merge.py:17
pmontree.opt
opt
Definition: pmontree.py:16
Symbols_t
std::unordered_map< Address_t, std::string > Symbols_t
Definition: HephProf.cxx:37
gTotalCalls
unsigned long long gTotalCalls
Definition: HephProf.cxx:68
mc.nskip
nskip
Definition: mc.PhPy8EG_Hto4l_NNLOPS_nnlo_30_ggH125_ZZ4l.py:41
ReadSymbols
void ReadSymbols(const std::string &input)
Definition: HephProf.cxx:193
WeaveCallPoints
void WeaveCallPoints(CallPoint &cp1, const CallPoint &cp2)
Definition: HephProf.cxx:95
RTTAlgmain.address
address
Definition: RTTAlgmain.py:55
Address_t
unsigned long Address_t
Definition: HephProf.cxx:35
gCallTree
CallTree_t gCallTree
Definition: HephProf.cxx:64
CondAlgsOpts.found
int found
Definition: CondAlgsOpts.py:101
Record::stacktrace
Address_t stacktrace[9]
Definition: HephProf.cxx:58
CaloCellTimeCorrFiller.filename
filename
Definition: CaloCellTimeCorrFiller.py:24
isPseudoAllocator
bool isPseudoAllocator(const CallInfo_t &call_info)
Definition: HephProf.cxx:87
CallPoint::m_news
unsigned long long m_news
Definition: HephProf.cxx:50
LocateAndWeave
bool LocateAndWeave(CallPoint &v1, const CallInfo_t &call_info, const CallPoint &callpoint)
Definition: HephProf.cxx:112
CallPoint
Definition: HephProf.cxx:41
gTotalSize2
unsigned long long gTotalSize2
Definition: HephProf.cxx:66
CallPoint::CallPoint
CallPoint()
Definition: HephProf.cxx:43
python.compressB64.c
def c
Definition: compressB64.py:93
CallTree_t
std::map< CallInfo_t, CallPoint > CallTree_t
Definition: HephProf.cxx:39
gTotalSize1
unsigned long long gTotalSize1
Definition: HephProf.cxx:65
ProcessProfile
void ProcessProfile(const std::string &input, const std::string &output)
Definition: HephProf.cxx:237