ATLAS Offline Software
Loading...
Searching...
No Matches
HephProf.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
3*/
4
5#include <fstream>
6#include <iostream>
7#include <iomanip>
8#include <map>
9#include <string>
10#include <utility>
11
12#include <stddef.h>
13#include <getopt.h>
14#include <glob.h>
15#include <stdio.h>
16#include <string.h>
17#include <unistd.h>
18
19#include <sys/stat.h>
20
21#include <unordered_map>
22#include "boost/io/ios_state.hpp"
23
24
25//- the following function will be called on each error on input
26int Usage( const char* prog ) {
27 std::cout << "usage: " << prog << '\n'
28 << " -i <input_name> : file pattern to use for input"
29 << std::endl;
30 return -1;
31}
32//-
33
34//- helper classes
35class CallPoint;
36typedef unsigned long Address_t;
37typedef std::unordered_map< Address_t, std::string > Symbols_t;
38typedef std::pair< Address_t, Address_t > CallInfo_t;
39typedef std::map< CallInfo_t, CallPoint > CallTree_t;
40
41class CallPoint {
42public:
46
48
49 unsigned long long m_allocated;
50 unsigned long long m_news;
51
52 unsigned long long m_inclusive;
53 unsigned long long m_called;
54};
55
56struct Record {
57 long size;
59};
60//-
61
62//- data
65unsigned long long gTotalSize1 = 0;
66unsigned long long gTotalSize2 = 0;
67unsigned long long gTotalIncl = 0;
68unsigned long long gTotalCalls = 0;
69
70static std::string gUnknown = "<unknown>";
71static Address_t gMalloc = 0xCDCDCDCD;
72//-
73
74inline std::string getSymbol( Address_t address ) {
75 Symbols_t::iterator isym = gSymbols.find( address );
76
77 if ( isym != gSymbols.end() )
78 return isym->second;
79
80 return gUnknown;
81}
82
83inline bool isKnownSymbol( const CallInfo_t& call_info ) {
84 return gSymbols.find( call_info.second ) != gSymbols.end();
85}
86
87inline bool isPseudoAllocator( const CallInfo_t& call_info ) {
88 return gPseudoAllocators.find( call_info.second ) != gPseudoAllocators.end();
89}
90
91inline bool isAllocator( const CallInfo_t& call_info ) {
92 return gAllocators.find( call_info.second ) != gAllocators.end();
93}
94
95inline void WeaveCallPoints( CallPoint& cp1, const CallPoint& cp2 ) {
96 cp1.m_allocated += cp2.m_allocated;
97 cp1.m_called += cp2.m_called;
98 cp1.m_inclusive += cp2.m_inclusive;
99 cp1.m_news += cp2.m_news;
100
101 for ( CallTree_t::const_iterator icp2 = cp2.m_callees.begin();
102 icp2 != cp2.m_callees.end(); ++icp2 ) {
103
104 CallTree_t::iterator icp1 = cp1.m_callees.find( icp2->first );
105 if ( icp1 != cp1.m_callees.end() )
106 WeaveCallPoints( icp1->second, icp2->second );
107 else
108 cp1.m_callees[ icp2->first ] = icp2->second;
109 }
110}
111
112inline bool LocateAndWeave(
113 CallPoint& v1, const CallInfo_t& call_info, const CallPoint& callpoint ) {
114
115 bool found = false;
116
117 CallTree_t::iterator icpv1 = v1.m_callees.find( call_info );
118 if ( icpv1 != v1.m_callees.end() ) {
119 WeaveCallPoints( icpv1->second, callpoint );
120 found = true;
121
122 } else {
123 for ( CallTree_t::iterator icp = v1.m_callees.begin();
124 icp != v1.m_callees.end(); ++icp ) {
125 found = LocateAndWeave( icp->second, call_info, callpoint );
126 if ( found )
127 break;
128 }
129 }
130
131 return found;
132}
133
134unsigned long long CalcSizeAndCalls( const CallInfo_t& call_info, CallPoint& callpoint ) {
135 unsigned long long size = 0;
136
137 if ( isAllocator( call_info ) || isPseudoAllocator( call_info ) ) {
138
139 size = 0;
140 for ( CallTree_t::iterator icp = callpoint.m_callees.begin();
141 icp != callpoint.m_callees.end(); ++icp ) {
142 size += CalcSizeAndCalls( icp->first, icp->second );
143 }
144
145 callpoint.m_inclusive = callpoint.m_allocated;
146
147 } else {
148
149 size = callpoint.m_allocated;
150 for ( CallTree_t::iterator icp = callpoint.m_callees.begin();
151 icp != callpoint.m_callees.end(); ++icp ) {
152 size += CalcSizeAndCalls( icp->first, icp->second );
153 }
154
155 callpoint.m_inclusive = size;
156
157 }
158
159 return size;
160}
161
162void WriteCallPoint( std::ofstream& f,
163 const CallInfo_t& call_info, const CallPoint& callpoint ) {
164
165 const std::string& call_name = getSymbol( call_info.second );
166
167 f << "fl=athena.cxx\n";
168 f << "fn=" << call_name << '\n';
169 f << call_info.first << ' ' << callpoint.m_allocated
170 << ' ' << callpoint.m_news << '\n';
171
172 if ( isAllocator( call_info ) )
173 gTotalSize2 += callpoint.m_allocated;
174 else if ( ! isPseudoAllocator( call_info ) )
175 gTotalSize1 += callpoint.m_allocated;
176
177 for ( CallTree_t::const_iterator icp = callpoint.m_callees.begin();
178 icp != callpoint.m_callees.end(); ++icp ) {
179 f << "cfn=" << getSymbol( icp->first.second ) << '\n';
180 f << "calls=" << icp->second.m_called << ' ' << icp->first.second << '\n';
181 f << icp->first.first << ' ' << icp->second.m_inclusive
182 << ' ' << icp->second.m_called << '\n';
183 }
184 f << '\n';
185
186 for ( CallTree_t::const_iterator icp = callpoint.m_callees.begin();
187 icp != callpoint.m_callees.end(); ++icp ) {
188 WriteCallPoint( f, icp->first, icp->second );
189 }
190
191}
192
193void ReadSymbols( const std::string& input ) {
194// read symbol file
195 std::cout << "reading symbols from " << input << " ... " << std::endl;
196 FILE* fsyms = popen( ("gzip -dc " + input).c_str(), "r" );
197
198 Address_t l;
199 while ( fread( (void*)&l, sizeof(l), 1, fsyms ) == 1) {
200 char s[4096];
201 size_t off = 0;
202 while (true) {
203 char c = 0;
204 int stat = fread( &c, sizeof(char), 1, fsyms );
205 if (stat <= 0 || c == '\n') break;
206 if (off < sizeof(s)-1)
207 s[off++] = c;
208 }
209 s[off] = '\0';
210
211 gSymbols[ l ] = s;
212
213 if ( strcmp( s, "operator new(unsigned int)" ) == 0 )
214 gAllocators[ l ] = s;
215 else if ( strcmp( s, "operator new(unsigned int, std::nothrow_t const&)" ) == 0 )
216 gAllocators[ l ] = s;
217
218 else if ( strcmp( s, "operator new[](unsigned int)" ) == 0 )
219 gPseudoAllocators[ l ] = s;
220
221 else if ( strncmp( s, "std::", 5 ) == 0 ||
222 strncmp( s, "__gnu_cxx", 9 ) == 0 ||
223 strstr( s, "_S_construct" ) != 0 ||
224 strstr( s, "_M_allocate_and_copy" ) != 0 ) {
225 gPseudoAllocators[ l ] = s;
226 }
227
228 }
229
230// add explicitly by hand as it won't be on file
231 gAllocators[ gMalloc ] = "malloc";
232 gSymbols[ gMalloc ] = "malloc";
233
234 pclose( fsyms );
235}
236
237void ProcessProfile( const std::string& input, const std::string& output ) {
238
239 boost::io::ios_base_all_saver coutsave (std::cout);
240 struct stat statbuf;
241 if ( stat( input.c_str(), &statbuf ) != 0 ) {
242 std::cout << "failed to stat " << input << " ... exiting ... " << std::endl;
243 return;
244 }
245
246 double inv_total_bytes = 1. / static_cast<double> (statbuf.st_size);
247 {
248 unsigned long long byte_counter = 0, stack_counter = 0, total_size = 0;
249
250 // read data file
251 std::cout << "reading traces from " << input << " ... " << std::endl;
252 FILE* fprof = popen( ("gzip -dc " + input).c_str(), "r" );
253
254 long size = 0, nstack = 0, step_progress = 0, progress = 0;
255 const int maxstack = 128;
256 Address_t stacktrace[ maxstack ];
257
258 std::cout.setf( std::ios::fixed, std::ios::floatfield );
259 std::cout << std::setprecision( 2 );
260
261 std::cout << " progress: ["; std::cout.flush();
262 while ( fread( (void*)&size, sizeof(long), 1, fprof ) ) {
263 if (fread( (void*)&nstack, sizeof(long), 1, fprof ) < 1) break;
264 int nskip = 0;
265 if (nstack < 0)
266 nstack = 0;
267 else if (nstack > maxstack) {
268 nskip = nstack - maxstack;
269 nstack = maxstack;
270 }
271 if ((long)fread( (void*)stacktrace, sizeof(Address_t), nstack, fprof ) < nstack) break;
272 while (nskip > 0) {
273 Address_t dum;
274 if ((long)fread( &dum, sizeof(Address_t), 1, fprof ) < 1) break;
275 --nskip;
276 }
277
278 byte_counter += (2+nstack)*4;
279 progress = long(2.5*byte_counter * inv_total_bytes); // estimate inflator at 40x
280 if ( progress > 100 ) progress = 100;
281 if ( step_progress < progress ) {
282 for ( ; step_progress <= progress; ++step_progress ) {
283 if ( ! (step_progress % 20) ) {
284 std::cout << step_progress << "%"; std::cout.flush();
285 } else if ( ! (step_progress % 5) ) {
286 std::cout << '.'; std::cout.flush();
287 }
288 }
289 }
290
291 stack_counter += 1;
292 total_size += size;
293
294 CallInfo_t caller_info( 0, stacktrace[nstack-1] );
295 CallPoint* caller = &gCallTree[ caller_info ];
296 caller->m_called += 1;
297
298 CallInfo_t callee_info;
299
300 for ( int i = nstack-1; i > 0; --i ) {
301 // a caller is the point one up in the stack, and it calls the function at
302 // the current level; the call point one up need not always call the same
303 // function, b/c of the use of virtual function calls or function pointers,
304 // and so it alone does not fully specify a point in the call graph
305
306 callee_info = CallInfo_t( stacktrace[i], stacktrace[i-1] );
307
308 // if caller is pseudo-allocator and calling same, then skip immediately to allocator
309 if ( isPseudoAllocator( caller_info ) ) {
310 // note that this skips over things like objects that are allocated in a default
311 // constructor of another object that is called when filling an STL container (the
312 // full blame goes to the allocator of the container)
313 callee_info = CallInfo_t( stacktrace[i], stacktrace[0] );
314
315 if ( ! isKnownSymbol( callee_info ) ) {
316 // happens e.g. if an fstream file buffer is going through a system call; wire
317 // it directly onto malloc (does not particularly matter for the user, as the
318 // real blame is already upstream, but it does for the cross-check accounting)
319 callee_info.second = gMalloc;
320 }
321
322 }
323
324 CallPoint* callee = &caller->m_callees[ callee_info ];
325 callee->m_called += 1;
326
327 // if calling pseudo- or real allocator, account memory to caller
328 if ( isPseudoAllocator( callee_info ) ) {
329 caller->m_allocated += size;
330 caller->m_news += 1;
331
332 } else if ( isAllocator( callee_info ) ) {
333 caller->m_allocated += size;
334 caller->m_news += 1;
335
336 // to make sure that kcachegrind will actually show the allocator
337 callee->m_allocated += size;
338 callee->m_news += 1;
339
340 break;
341
342 } else if ( i == 1 ) {
343 // this happens if malloc is called directly (i.e. operator new is
344 // not the bottom of the stack trace), so add it explicitly
345 CallInfo_t minfo( stacktrace[i-1], gMalloc );
346 CallPoint* mpoint = &callee->m_callees[ minfo ];
347
348 callee->m_allocated += size;
349 callee->m_news += 1;
350
351 mpoint->m_allocated += size;
352 mpoint->m_news += 1;
353 mpoint->m_called += 1;
354
355 break;
356 }
357
358 caller = callee;
359 caller_info = callee_info;
360 }
361
362 }
363
364 pclose( fprof );
365
366 for ( ; step_progress <= 100; ++step_progress ) {
367 if ( ! (step_progress % 20) ) {
368 std::cout << step_progress << "%"; std::cout.flush();
369 } else if ( ! (step_progress % 5) ) {
370 std::cout << '.'; std::cout.flush();
371 }
372 }
373 std::cout << "]" << std::endl;
374 std::cout << "number of stacks: " << stack_counter << std::endl;
375 std::cout << "total alloc size: " << total_size << std::endl;
376 }
377
378 {
379 // calculate inclusive numbers
380 std::cout << "calculating sizes ... " << std::endl;
381 for ( CallTree_t::iterator icp = gCallTree.begin(); icp != gCallTree.end(); ++icp ) {
382 CalcSizeAndCalls( icp->first, icp->second );
383 }
384
385 }
386
387 {
388 // create kcachegrind file
389 std::cout << "writing output to " << output << " ... (" << gCallTree.size() << " top level entries)" << std::endl;
390
391 std::ofstream fresult( output.c_str() );
392 fresult << "events: memsize nallocs\n\n";
393
394 fresult << "fl=athena.cxx\nfn=athena\n0 0\n";
395 for ( CallTree_t::iterator icp = gCallTree.begin(); icp != gCallTree.end(); ++icp ) {
396 fresult << "cfn=" << getSymbol( icp->first.second ) << '\n';
397 fresult << "calls=" << icp->second.m_called << ' ' << icp->first.second << '\n';
398 fresult << icp->first.first << ' ' << icp->second.m_inclusive
399 << ' ' << icp->second.m_called << '\n';
400 }
401 fresult << '\n';
402
403 for ( CallTree_t::iterator icp = gCallTree.begin(); icp != gCallTree.end(); ++icp ) {
404 gTotalIncl += icp->second.m_inclusive;
405 gTotalCalls += icp->second.m_called;
406 WriteCallPoint( fresult, icp->first, icp->second );
407 }
408
409 fresult.close();
410
411 std::cout << "cross-check all calls: " << gTotalCalls << std::endl;
412 std::cout << "cross-check inclusive: " << gTotalIncl << std::endl;
413 std::cout << "cross-check-size (non-allocator): " << gTotalSize1 << std::endl;
414 std::cout << "cross-check-size (allocator): " << gTotalSize2 << std::endl;
415
416 }
417
418// done, erase data for next call
419 gCallTree.clear();
420 gTotalSize1 = 0;
421 gTotalSize2 = 0;
422 gTotalIncl = 0;
423 gTotalCalls = 0;
424}
425
426int main( int argc, char* argv [] ) {
427 int opt = -1;
428 std::string input = "hephaestus";
429
430 while ( (opt = getopt( argc, argv, "i:") ) != -1 ) {
431 switch ( opt ) {
432 case 'i':
433 input = optarg;
434 break;
435 default: /* '?' */
436 return Usage( argv[0] );
437 }
438 }
439
440 ReadSymbols( input+".symb" );
441
442 glob_t globbuf;
443 globbuf.gl_offs = 0;
444 glob( (input + "*.prof").c_str(), GLOB_DOOFFS, NULL, &globbuf );
445
446 std::cout << "files matching: " << globbuf.gl_pathc << std::endl;
447 for ( size_t ig = 0; ig < globbuf.gl_pathc; ++ig ) {
448 std::string filename = globbuf.gl_pathv[ ig ];
449 std::string::size_type firstdot = filename.find('.');
450 std::string evtnumber = filename.substr( firstdot, filename.rfind( '.' )-firstdot );
451 ProcessProfile( filename, filename.substr( 0, firstdot ) + ".cgc" + evtnumber );
452 }
453
454 globfree( &globbuf );
455
456 return 0;
457}
unsigned long long gTotalSize2
Definition HephProf.cxx:66
static std::string gUnknown
Definition HephProf.cxx:70
bool LocateAndWeave(CallPoint &v1, const CallInfo_t &call_info, const CallPoint &callpoint)
Definition HephProf.cxx:112
Symbols_t gAllocators
Definition HephProf.cxx:63
bool isAllocator(const CallInfo_t &call_info)
Definition HephProf.cxx:91
void ProcessProfile(const std::string &input, const std::string &output)
Definition HephProf.cxx:237
unsigned long long gTotalIncl
Definition HephProf.cxx:67
Symbols_t gSymbols
Definition HephProf.cxx:63
unsigned long long CalcSizeAndCalls(const CallInfo_t &call_info, CallPoint &callpoint)
Definition HephProf.cxx:134
unsigned long long gTotalCalls
Definition HephProf.cxx:68
unsigned long long gTotalSize1
Definition HephProf.cxx:65
unsigned long Address_t
Definition HephProf.cxx:36
void WriteCallPoint(std::ofstream &f, const CallInfo_t &call_info, const CallPoint &callpoint)
Definition HephProf.cxx:162
std::unordered_map< Address_t, std::string > Symbols_t
Definition HephProf.cxx:37
std::map< CallInfo_t, CallPoint > CallTree_t
Definition HephProf.cxx:39
std::string getSymbol(Address_t address)
Definition HephProf.cxx:74
void ReadSymbols(const std::string &input)
Definition HephProf.cxx:193
std::pair< Address_t, Address_t > CallInfo_t
Definition HephProf.cxx:38
void WeaveCallPoints(CallPoint &cp1, const CallPoint &cp2)
Definition HephProf.cxx:95
static Address_t gMalloc
Definition HephProf.cxx:71
bool isPseudoAllocator(const CallInfo_t &call_info)
Definition HephProf.cxx:87
bool isKnownSymbol(const CallInfo_t &call_info)
Definition HephProf.cxx:83
CallTree_t gCallTree
Definition HephProf.cxx:64
int Usage(const char *prog)
Definition HephProf.cxx:26
Symbols_t gPseudoAllocators
Definition HephProf.cxx:63
CallTree_t m_callees
Definition HephProf.cxx:47
unsigned long long m_inclusive
Definition HephProf.cxx:52
unsigned long long m_allocated
Definition HephProf.cxx:49
unsigned long long m_news
Definition HephProf.cxx:50
unsigned long long m_called
Definition HephProf.cxx:53
int main()
Definition hello.cxx:18
Address_t stacktrace[9]
Definition HephProf.cxx:58
long size
Definition HephProf.cxx:57