ATLAS Offline Software
Functions
SlurmDriver.cxx File Reference
#include <EventLoop/SlurmDriver.h>
#include <AsgMessaging/StatusCode.h>
#include <EventLoop/BatchJob.h>
#include <EventLoop/Job.h>
#include <EventLoop/ManagerData.h>
#include <EventLoop/MessageCheck.h>
#include <RootCoreUtils/Assert.h>
#include <RootCoreUtils/ThrowMsg.h>
#include <TSystem.h>
#include <fstream>
#include <memory>
#include <sstream>

Go to the source code of this file.

Functions

 ClassImp (EL::SlurmDriver) namespace EL
 

Function Documentation

◆ ClassImp()

ClassImp ( EL::SlurmDriver  )
Author
Nils Krumnack

Definition at line 30 of file SlurmDriver.cxx.

33 {
34  //****************************************************
35  void SlurmDriver :: testInvariant () const
36  {
37  RCU_INVARIANT (this != 0);
38  }
39  //****************************************************
40  SlurmDriver :: SlurmDriver ()
41  {
42  m_b_job_name = false;
43  m_b_account = false;
44  m_b_run_time = false;
45 
46  RCU_NEW_INVARIANT (this);
47  }
48  //****************************************************
49  ::StatusCode SlurmDriver ::
50  doManagerStep (Detail::ManagerData& data) const
51  {
52  RCU_READ_INVARIANT (this);
53  using namespace msgEventLoop;
54  ANA_CHECK (BatchDriver::doManagerStep (data));
55  switch (data.step)
56  {
57  case Detail::ManagerStep::batchScriptVar:
58  {
59  data.batchInit = "export PATH LD_LIBRARY_PATH PYTHONPATH";
60  }
61  break;
62 
63  case Detail::ManagerStep::submitJob:
64  case Detail::ManagerStep::doResubmit:
65  {
66  auto all_set = m_b_job_name && m_b_account && m_b_run_time;
67  if (!all_set)
68  {
69  ANA_MSG_INFO ("Job Name" << m_job_name);
70  ANA_MSG_INFO ("Account " << m_account);
71  ANA_MSG_INFO ("Run Time " << m_run_time);
72 
73  ANA_MSG_ERROR("All parameters need to be set before job can be submitted");
74  return ::StatusCode::FAILURE;
75  }
76 
77  RCU_READ_INVARIANT (this);
78 
79  if (data.resubmit)
80  RCU_THROW_MSG ("resubmission not supported for this driver");
81 
82  assert (!data.batchJobIndices.empty());
83  assert (data.batchJobIndices.back() + 1 == data.batchJobIndices.size());
84  const std::size_t njob = data.batchJobIndices.size();
85 
86  if(!data.options.castBool(Job::optBatchSharedFileSystem,true))
87  {
88  int status=gSystem->CopyFile("RootCore.par",(data.submitDir+"/submit/RootCore.par").c_str());
89  if(status != 0)
90  RCU_THROW_MSG ("failed to copy RootCore.par");
91  }
92 
93  {
94  std::ofstream file ((data.submitDir + "/submit/submit").c_str());
95 
96  file << "#!/bin/bash \n";
97  file << "\n";
98  file << "#SBATCH --job-name=" << m_job_name << "\n";
99  file << "#SBATCH --output=slurm-%j.out\n";
100  file << "#SBATCH --error=slurm-%j.err\n";
101  file << "#SBATCH --account=" << m_account << "\n";
102  if(!m_partition .empty()) file << "#SBATCH --partition=" << m_partition << "\n";
103  file << "#SBATCH --time=" << m_run_time << "\n";
104  if(!m_memory .empty()) file << "#SBATCH --mem=" << m_memory << "\n";
105  if(!m_constraint.empty()) file << "#SBATCH --constraint=" << m_constraint << "\n";
106  file << "\n";
107  file << data.options.castString(Job::optBatchSlurmExtraConfigLines) << "\n";
108  file << "\n";
109  //note: no "\n" at the of this string since this goes as pre-command to the execution of the next line
110  file << data.options.castString(Job::optBatchSlurmWrapperExec);
111  file << "./run ${SLURM_ARRAY_TASK_ID}\n";
112  }
113 
114  {
115  std::ostringstream cmd;
116  cmd << "cd " << data.submitDir << "/submit && sbatch --array=0-" << njob-1 << " " << data.options.castString (Job::optSubmitFlags) << " submit";
117  if (gSystem->Exec (cmd.str().c_str()) != 0)
118  RCU_THROW_MSG (("failed to execute: " + cmd.str()).c_str());
119  }
120  data.submitted = true;
121  }
122  break;
123 
124  default:
125  break;
126  }
127  return ::StatusCode::SUCCESS;
128  }
129 
130  //****************************************************
131  void SlurmDriver :: SetJobName(std::string job_name)
132  {
133  m_b_job_name = true;
134  m_job_name = job_name;
135  }
136  void SlurmDriver :: SetAccount(std::string account)
137  {
138  m_b_account = true;
139  m_account = account;
140  }
141  void SlurmDriver :: SetPartition(std::string partition)
142  {
143  m_partition = partition;
144  }
145  void SlurmDriver :: SetRunTime(std::string run_time)
146  {
147  m_b_run_time = true;
148  m_run_time = run_time;
149  }
150  void SlurmDriver :: SetMemory(std::string memory)
151  {
152  m_memory = memory;
153  }
154  void SlurmDriver :: SetConstrain(std::string constraint)
155  {
156  m_constraint = constraint;
157  }
158  //****************************************************
159 }
data
char data[hepevt_bytes_allocation_ATLAS]
Definition: HepEvt.cxx:11
rerun_display.cmd
string cmd
Definition: rerun_display.py:67
ANA_MSG_ERROR
#define ANA_MSG_ERROR(xmsg)
Macro printing error messages.
Definition: Control/AthToolSupport/AsgMessaging/AsgMessaging/MessageCheck.h:294
ANA_CHECK
#define ANA_CHECK(EXP)
check whether the given expression was successful
Definition: Control/AthToolSupport/AsgMessaging/AsgMessaging/MessageCheck.h:324
empty
bool empty(TH1 *h)
Definition: computils.cxx:294
python.utils.AtlRunQueryMemUtil.memory
def memory(since=0.0)
Definition: AtlRunQueryMemUtil.py:30
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ANA_MSG_INFO
#define ANA_MSG_INFO(xmsg)
Macro printing info messages.
Definition: Control/AthToolSupport/AsgMessaging/AsgMessaging/MessageCheck.h:290
file
TFile * file
Definition: tile_monitor.h:29
RCU_INVARIANT
#define RCU_INVARIANT(x)
Definition: Assert.h:201
StateLessPT_NewConfig.partition
partition
Definition: StateLessPT_NewConfig.py:49
merge.status
status
Definition: merge.py:17
RCU_THROW_MSG
#define RCU_THROW_MSG(message)
Definition: PrintMsg.h:58
RCU_READ_INVARIANT
#define RCU_READ_INVARIANT(x)
Definition: Assert.h:229
RCU_NEW_INVARIANT
#define RCU_NEW_INVARIANT(x)
Definition: Assert.h:233