ATLAS Offline Software
Loading...
Searching...
No Matches
SlurmDriver.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
3*/
4
6
7
8//
9// includes
10//
11
13
15#include <EventLoop/BatchJob.h>
16#include <EventLoop/Job.h>
21#include <TSystem.h>
22#include <fstream>
23#include <memory>
24#include <sstream>
25
26//
27// method implementations
28//
29
31
32namespace EL
33{
34 //****************************************************
35 void SlurmDriver :: testInvariant () const
36 {
37 RCU_INVARIANT (this != 0);
38 }
39 //****************************************************
40 SlurmDriver :: SlurmDriver ()
41 {
42 m_b_job_name = false;
43 m_b_account = false;
44 m_b_run_time = false;
45
46 RCU_NEW_INVARIANT (this);
47 }
48 //****************************************************
49 ::StatusCode SlurmDriver ::
50 doManagerStep (Detail::ManagerData& data) const
51 {
52 RCU_READ_INVARIANT (this);
53 using namespace msgEventLoop;
55 switch (data.step)
56 {
58 {
59 data.batchInit = "export PATH LD_LIBRARY_PATH PYTHONPATH";
60 }
61 break;
62
65 {
66 auto all_set = m_b_job_name && m_b_account && m_b_run_time;
67 if (!all_set)
68 {
69 ANA_MSG_INFO ("Job Name" << m_job_name);
70 ANA_MSG_INFO ("Account " << m_account);
71 ANA_MSG_INFO ("Run Time " << m_run_time);
72
73 ANA_MSG_ERROR("All parameters need to be set before job can be submitted");
74 return ::StatusCode::FAILURE;
75 }
76
77 RCU_READ_INVARIANT (this);
78
79 if (data.resubmit)
80 RCU_THROW_MSG ("resubmission not supported for this driver");
81
82 assert (!data.batchJobIndices.empty());
83 assert (data.batchJobIndices.back() + 1 == data.batchJobIndices.size());
84 const std::size_t njob = data.batchJobIndices.size();
85
86 if(!data.options.castBool(Job::optBatchSharedFileSystem,true))
87 {
88 int status=gSystem->CopyFile("RootCore.par",(data.submitDir+"/submit/RootCore.par").c_str());
89 if(status != 0)
90 RCU_THROW_MSG ("failed to copy RootCore.par");
91 }
92
93 {
94 std::ofstream file ((data.submitDir + "/submit/submit").c_str());
95
96 file << "#!/bin/bash \n";
97 file << "\n";
98 file << "#SBATCH --job-name=" << m_job_name << "\n";
99 file << "#SBATCH --output=slurm-%j.out\n";
100 file << "#SBATCH --error=slurm-%j.err\n";
101 file << "#SBATCH --account=" << m_account << "\n";
102 if(!m_partition .empty()) file << "#SBATCH --partition=" << m_partition << "\n";
103 file << "#SBATCH --time=" << m_run_time << "\n";
104 if(!m_memory .empty()) file << "#SBATCH --mem=" << m_memory << "\n";
105 if(!m_constraint.empty()) file << "#SBATCH --constraint=" << m_constraint << "\n";
106 file << "\n";
107 file << data.options.castString(Job::optBatchSlurmExtraConfigLines) << "\n";
108 file << "\n";
109 //note: no "\n" at the of this string since this goes as pre-command to the execution of the next line
110 file << data.options.castString(Job::optBatchSlurmWrapperExec);
111 file << "./run ${SLURM_ARRAY_TASK_ID}\n";
112 }
113
114 {
115 std::ostringstream cmd;
116 cmd << "cd " << data.submitDir << "/submit && sbatch --array=0-" << njob-1 << " " << data.options.castString (Job::optSubmitFlags) << " submit";
117 if (gSystem->Exec (cmd.str().c_str()) != 0)
118 RCU_THROW_MSG (("failed to execute: " + cmd.str()).c_str());
119 }
120 data.submitted = true;
121 }
122 break;
123
124 default:
125 break;
126 }
127 return ::StatusCode::SUCCESS;
128 }
129
130 //****************************************************
131 void SlurmDriver :: SetJobName(std::string job_name)
132 {
133 m_b_job_name = true;
134 m_job_name = job_name;
135 }
136 void SlurmDriver :: SetAccount(std::string account)
137 {
138 m_b_account = true;
139 m_account = account;
140 }
141 void SlurmDriver :: SetPartition(std::string partition)
142 {
143 m_partition = partition;
144 }
145 void SlurmDriver :: SetRunTime(std::string run_time)
146 {
147 m_b_run_time = true;
148 m_run_time = run_time;
149 }
150 void SlurmDriver :: SetMemory(std::string memory)
151 {
152 m_memory = memory;
153 }
154 void SlurmDriver :: SetConstrain(std::string constraint)
155 {
156 m_constraint = constraint;
157 }
158 //****************************************************
159}
#define RCU_INVARIANT(x)
Definition Assert.h:201
#define RCU_NEW_INVARIANT(x)
Definition Assert.h:233
#define RCU_READ_INVARIANT(x)
Definition Assert.h:229
#define ANA_MSG_INFO(xmsg)
Macro printing info messages.
#define ANA_MSG_ERROR(xmsg)
Macro printing error messages.
#define ANA_CHECK(EXP)
check whether the given expression was successful
char data[hepevt_bytes_allocation_ATLAS]
Definition HepEvt.cxx:11
#define RCU_THROW_MSG(message)
Definition PrintMsg.h:58
ClassImp(EL::SlurmDriver) namespace EL
static const Attributes_t empty
virtual::StatusCode doManagerStep(Detail::ManagerData &data) const override
static const std::string optBatchSharedFileSystem
description: batch-specific options rationale: these options are for configuring batch drivers
Definition Job.h:529
static const std::string optSubmitFlags
description: the name of the option for supplying extra submit parameters to batch systems rationale:...
Definition Job.h:296
static const std::string optBatchSlurmWrapperExec
Append a command before the main executable is called This is useful is you want to execute the comma...
Definition Job.h:535
static const std::string optBatchSlurmExtraConfigLines
The content of this string will be executed in the job script on the worker node before the main exec...
Definition Job.h:532
a Driver for running on SLURM batch systems
Definition SlurmDriver.h:23
@ doResubmit
call the actual doResubmit method
@ submitJob
do the actual job submission
Definition ManagerStep.h:92
@ batchScriptVar
create the variables needed for the batch-run script
Definition ManagerStep.h:83
This module defines the arguments passed from the BATCH driver to the BATCH worker.
TFile * file