10 #include "GaudiKernel/IEvtSelector.h"
11 #include "GaudiKernel/IIoComponentMgr.h"
12 #include "GaudiKernel/IFileMgr.h"
13 #include "GaudiKernel/IChronoStatSvc.h"
14 #include "GaudiKernel/ISvcLocator.h"
15 #include "GaudiKernel/IIncidentSvc.h"
16 #include "GaudiKernel/FileIncident.h"
17 #include "GaudiKernel/Timing.h"
30 #include "yampl/SocketFactory.h"
34 ,
const std::string&
name
35 ,
const IInterface*
parent)
38 , m_nEventsBeforeFork(0)
41 , m_chronoStatSvc(
"ChronoStatSvc",
name)
42 , m_incidentSvc(
"IncidentSvc",
name)
44 , m_channel2Scatterer(
"")
45 , m_channel2EvtSel(
"")
46 , m_sharedRankQueue(0)
47 , m_sharedFailedPidQueue(0)
50 declareInterface<IAthenaMPTool>(
this);
73 return StatusCode::SUCCESS;
79 return StatusCode::SUCCESS;
82 int EvtRangeProcessor::makePool(
int,
int nprocs,
const std::string& topdir)
101 std::ostringstream rankQueueName;
102 rankQueueName <<
"EvtRangeProcessor_RankQueue_" << getpid() <<
"_" <<
m_randStr;
132 return StatusCode::SUCCESS;
144 ATH_MSG_ERROR(
"Unable to retrieve the pointer to Shared Failed PID Queue");
145 return StatusCode::FAILURE;
160 return StatusCode::FAILURE;
166 switch(itProcState->second) {
169 ATH_MSG_ERROR(
"Worker with process ID=" <<
pid <<
" failed at initialization!");
170 return StatusCode::FAILURE;
177 ATH_MSG_ERROR(
"Failed to report the crashed pid to the Event Range Scatterer");
178 return StatusCode::FAILURE;
182 if(startProcess().isSuccess()) {
189 return StatusCode::FAILURE;
200 return StatusCode::FAILURE;
210 ATH_MSG_ERROR(
"Detected unexpected state " << itProcState->second <<
" of failed worker with PID=" <<
pid);
211 return StatusCode::FAILURE;
219 ATH_MSG_ERROR(
"Failed to release the Event Range Scatterer");
220 return StatusCode::FAILURE;
233 return StatusCode::FAILURE;
243 if((
unsigned)(presult->
output.
size)>=
sizeof(
int)) {
251 ATH_MSG_ERROR(
"Unable to find PID=" << childPid <<
" in the Proc States map!");
252 return StatusCode::FAILURE;
255 ATH_MSG_DEBUG(
"Decoding the output of PID=" << childPid <<
" with the size=" <<
output.size);
261 ATH_MSG_ERROR(
"Problem scheduling execution on PID=" << childPid);
262 return StatusCode::FAILURE;
270 memcpy(&func,(
char*)
output.data+
sizeof(
int),
sizeof(func));
275 memcpy(&nevt,(
char*)
output.data+
sizeof(
int)+
sizeof(func),
sizeof(
int));
277 ATH_MSG_DEBUG(
"PID=" << childPid <<
" processed " << nevt <<
" events");
281 ATH_MSG_DEBUG(
"Added PID=" << childPid <<
" to the finalization queue");
287 ATH_MSG_ERROR(
"Problem scheduling finalization on PID=" << childPid);
288 return StatusCode::FAILURE;
291 ATH_MSG_INFO(
"Scheduled finalization of PID=" << childPid);
301 if(pidFront==childPid) {
306 ATH_MSG_ERROR(
"Failed to set the process PID=" << pidFront <<
" free");
307 return StatusCode::FAILURE;
312 ATH_MSG_DEBUG(
"PID=" << childPid <<
" removed from the queue");
317 return StatusCode::FAILURE;
326 ATH_MSG_ERROR(
"Finalized PID=" << childPid <<
" while PID=" <<
pid <<
" was expected");
327 return StatusCode::FAILURE;
336 if(
res)
return StatusCode::FAILURE;
340 return StatusCode::SUCCESS;
350 std::ostringstream ostr;
357 <<
". Number of events processed: " << ostr.str());
365 std::ostringstream workerIndex;
369 filenames.push_back(worker_rundir.string()+std::string(
"/AthenaMP.log"));
385 *(
int*)(outwork->
data) = 1;
398 std::ostringstream workindex;
406 if(
mkdir(worker_rundir.string().c_str(),S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)==-1) {
407 ATH_MSG_ERROR(
"Unable to make worker run directory: " << worker_rundir.string() <<
". " <<
fmterror(errno));
416 ATH_MSG_INFO(
"Logs redirected in the AthenaMP event worker PID=" << getpid());
423 ATH_MSG_INFO(
"Io registry updated in the AthenaMP event worker PID=" << getpid());
427 if(std::filesystem::is_regular_file(
"SimParams.db"))
428 COPY_FILE_HACK(
"SimParams.db", abs_worker_rundir.string()+
"/SimParams.db");
429 if(std::filesystem::is_regular_file(
"DigitParams.db"))
430 COPY_FILE_HACK(
"DigitParams.db", abs_worker_rundir.string()+
"/DigitParams.db");
431 if(std::filesystem::is_regular_file(
"PDGTABLE.MeV"))
432 COPY_FILE_HACK(
"PDGTABLE.MeV", abs_worker_rundir.string()+
"/PDGTABLE.MeV");
442 ATH_MSG_INFO(
"File descriptors re-opened in the AthenaMP event worker PID=" << getpid());
446 if(!
m_ioMgr->io_reinitialize().isSuccess()) {
454 IService* evtSelSvc =
dynamic_cast<IService*
>(
evtSelector());
456 ATH_MSG_ERROR(
"Failed to dyncast event selector to IService");
459 if(!evtSelSvc->start().isSuccess()) {
468 const std::list<IService*>& service_list = serviceLocator()->getServices();
469 std::list<IService*>::const_iterator itSvc = service_list.begin(),
470 itSvcLast = service_list.end();
471 for(;itSvc!=itSvcLast;++itSvc) {
472 IEvtSelector* evtsel =
dynamic_cast<IEvtSelector*
>(*itSvc);
474 if((*itSvc)->start().isSuccess())
475 ATH_MSG_DEBUG(
"Restarted event selector " << (*itSvc)->name());
477 ATH_MSG_ERROR(
"Failed to restart event selector " << (*itSvc)->name());
485 if(chdir(worker_rundir.string().c_str())==-1) {
486 ATH_MSG_ERROR(
"Failed to chdir to " << worker_rundir.string());
491 *(
int*)(outwork->
data) = 0;
497 ATH_MSG_INFO(
"Exec function in the AthenaMP worker PID=" << getpid());
500 int nEventsProcessed(0);
502 std::queue<std::string> queueTokens;
505 yampl::ISocketFactory* socketFactory =
new yampl::SocketFactory();
507 yampl::ISocket* socket2Scatterer = socketFactory->createClientSocket(
yampl::Channel(socket2ScattererName,yampl::LOCAL),yampl::MOVE_DATA);
508 ATH_MSG_INFO(
"Created CLIENT socket to the Scatterer: " << socket2ScattererName);
509 std::ostringstream pidstr;
513 std::string ping = pidstr.str() + std::string(
" ready for event processing");
516 void* message2scatterer =
malloc(ping.size());
517 memcpy(message2scatterer,ping.data(),ping.size());
518 socket2Scatterer->send(message2scatterer,ping.size());
519 ATH_MSG_INFO(
"Sent a welcome message to the Scatterer");
523 char *responseBuffer(0);
524 std::string strPeerId;
525 ssize_t responseSize = socket2Scatterer->recv(responseBuffer,strPeerId);
527 if(responseSize==1) {
528 ATH_MSG_INFO(
"Empty range received. Terminating the loop");
532 std::string responseStr(responseBuffer,responseSize);
533 ATH_MSG_INFO(
"Received response from the Scatterer : " << responseStr);
536 System::ProcessTime time_start = System::getProcessTime();
539 size_t endpos = responseStr.find(
',');
540 while(endpos!=std::string::npos) {
541 queueTokens.push(responseStr.substr(startpos,endpos-startpos));
543 endpos = responseStr.find(
',',startpos);
545 queueTokens.push(responseStr.substr(startpos));
547 std::string rangeID = queueTokens.front();
567 if(queueTokens.front().find(
"PFN:")==0) {
570 filename = queueTokens.front().substr(4);
572 ATH_MSG_WARNING(
"Failed to set input file for the range: " << rangeID);
582 int startEvent =
std::atoi(queueTokens.front().c_str());
584 int endEvent =
std::atoi(queueTokens.front().c_str());
587 <<
", First Event:" << startEvent
588 <<
", Last Event:" << endEvent);
591 IEvtSelector::Context* ctx =
nullptr;
592 if (
evtSelector()->createContext (ctx).isFailure()) {
597 for(
int i(startEvent-1);
i<endEvent; ++
i) {
599 if(
sc.isRecoverable()) {
600 ATH_MSG_WARNING(
"Event " <<
i <<
" from range: " << rangeID <<
" not in the input file");
604 else if(
sc.isFailure()) {
615 ATH_MSG_WARNING(
"Failed to process the event " <<
i <<
" in range:" << rangeID);
625 if (
evtSelector()->releaseContext (ctx).isFailure()) {
637 std::string strOutpFile;
639 for(std::filesystem::directory_iterator fdIt(std::filesystem::current_path()); fdIt!=std::filesystem::directory_iterator(); fdIt++) {
640 if(fdIt->path().string().rfind(rangeID) == fdIt->path().string().size()-rangeID.size()) {
641 if(strOutpFile.empty()) {
642 strOutpFile = fdIt->path().string();
645 strOutpFile += (std::string(
",")+fdIt->path().string());
651 System::ProcessTime time_delta = System::getProcessTime() - time_start;
654 if(!strOutpFile.empty()) {
659 std::ostringstream outputReportStream;
660 outputReportStream << strOutpFile
662 <<
",CPU:" << time_delta.cpuTime<System::Sec>()
663 <<
",WALL:" << time_delta.elapsedTime<System::Sec>();
664 std::string outputFileReport = outputReportStream.str();
667 message2scatterer =
malloc(outputFileReport.size());
668 memcpy(message2scatterer,outputFileReport.data(),outputFileReport.size());
669 socket2Scatterer->send(message2scatterer,outputFileReport.size());
670 ATH_MSG_INFO(
"Reported the output " << outputFileReport);
674 ATH_MSG_WARNING(
"Failed to make an output file for range: " << rangeID);
690 memcpy((
char*)
outdata+
sizeof(
int),&func,
sizeof(func));
691 memcpy((
char*)
outdata+
sizeof(
int)+
sizeof(func),&nEventsProcessed,
sizeof(
int));
694 outwork->
size = outsize;
700 delete socket2Scatterer;
701 delete socketFactory;
708 ATH_MSG_INFO(
"Fin function in the AthenaMP worker PID=" << getpid());
714 if(
m_appMgr->finalize().isFailure()) {
715 std::cout <<
"Unable to finalize AppMgr" << std::endl;
726 memcpy((
char*)
outdata+
sizeof(
int),&func,
sizeof(func));
728 memcpy((
char*)
outdata+
sizeof(
int)+
sizeof(func),&nEvt,
sizeof(
int));
731 outwork->
size = outsize;
743 return StatusCode::FAILURE;
749 return StatusCode::FAILURE;
754 return StatusCode::FAILURE;
758 return StatusCode::SUCCESS;
763 if(
m_inpFile == newFile)
return StatusCode::SUCCESS;
766 IProperty* propertyServer =
dynamic_cast<IProperty*
>(
evtSelector());
767 if(!propertyServer) {
768 ATH_MSG_ERROR(
"Unable to dyn-cast the event selector to IProperty");
769 return StatusCode::FAILURE;
772 std::string propertyName(
"InputCollections");
774 std::vector<std::string> vect;
775 StringArrayProperty inputFileList(propertyName, vect);
776 if(propertyServer->getProperty(&inputFileList).isFailure()) {
777 ATH_MSG_ERROR(
"Failed to get InputCollections property value of the Event Selector");
778 return StatusCode::FAILURE;
780 if(newFile==inputFileList.value()[0]) {
782 return StatusCode::SUCCESS;
785 std::vector<std::string> vect{newFile,};
786 StringArrayProperty newInputFileList(propertyName, vect);
787 if(propertyServer->setProperty(newInputFileList).isFailure()) {
788 ATH_MSG_ERROR(
"Unable to update " << newInputFileList.name() <<
" property on the Event Selector");
789 return StatusCode::FAILURE;
792 return StatusCode::SUCCESS;
799 void* message2scatterer =
malloc(messageSize);
800 memcpy(message2scatterer,&
pid,
sizeof(
pid_t));
802 socket->send(message2scatterer,messageSize);