ATLAS Offline Software
SubmitDirManager.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
3 */
4 
6 
7 
8 
9 //
10 // includes
11 //
12 
14 
16 #include <EventLoop/Job.h>
17 #include <EventLoop/ManagerData.h>
18 #include <EventLoop/ManagerOrder.h>
19 #include <EventLoop/MessageCheck.h>
20 #include <RootCoreUtils/Assert.h>
21 #include <TSystem.h>
22 #include <format>
23 #include <boost/functional/hash.hpp>
24 #include <fcntl.h>
25 #include <regex>
26 #include <sys/stat.h>
27 #include <sys/time.h>
28 #include <sys/types.h>
29 #include <time.h>
30 #include <unistd.h>
31 
32 
33 using namespace EL::msgEventLoop;
34 
35 //
36 // method implementations
37 //
38 
39 namespace EL
40 {
41  namespace Detail
42  {
43  namespace
44  {
45  void reportErrno (int myerrno)
46  {
47  char error [160];
48  strerror_r (myerrno, error, sizeof (error));
49  ANA_MSG_ERROR ("encountered system error: " << error);
50  }
51 
52  void reportErrno ()
53  {
54  reportErrno (errno);
55  }
56  }
57 
58 
59 
60  std::pair<Detail::ManagerOrder,std::string> SubmitDirManager ::
61  getManagerOrder () const noexcept
62  {
63  return std::make_pair (ManagerOrder::SUBMIT_DIR, "");
64  }
65 
66 
67 
68  ::StatusCode SubmitDirManager ::
69  doManagerStep (Detail::ManagerData& data) const
70  {
71  switch (data.step)
72  {
73  case ManagerStep::updateSubmitDir:
74  {
75  std::smatch match;
76 
77  // make sure directory is absolute
78  if (data.submitDir[0] != '/')
79  data.submitDir = gSystem->WorkingDirectory () + ("/" + data.submitDir);
80 
81  // make sure we don't end in "/", "/." or "/.."
82  const std::regex identityEndExpr {"(/$)|(/\\.$)"};
83  while (std::regex_search (data.submitDir, match, identityEndExpr))
84  data.submitDir.replace (match.position(0), match.length(0), "");
85  const std::regex relativeEndExpr {"/\\.\\.$"};
86  if (std::regex_search (data.submitDir, match, relativeEndExpr))
87  {
88  ANA_MSG_ERROR ("submit directory can not end in \"..\": " + data.submitDir);
89  return ::StatusCode::FAILURE;
90  }
91 
92  // make sure we don't include any '..' in our path. this is
93  // mainly relevant for Ganga Tasks which may crash
94  // otherwise. this specifically doesn't use any of the
95  // "normal" regularization functions because those resolve
96  // symlinks as well, and depending on the driver we may not
97  // want that to happen.
98  const std::regex identityExpr {"(/\\./)|(//)|(^/\\.\\./)"};
99  while (std::regex_search (data.submitDir, match, identityExpr))
100  data.submitDir.replace (match.position(0), match.length(0), "/");
101  const std::regex relativeExpr {"/[^/]+/\\.\\./"};
102  while (std::regex_search (data.submitDir, match, relativeExpr))
103  data.submitDir.replace (match.position(0), match.length(0), "/");
104 
105  if (data.submitDir.find ("/pnfs/") == 0)
106  {
107  ANA_MSG_ERROR ("can not place submit directory on pnfs: " + data.submitDir);
108  return ::StatusCode::FAILURE;
109  }
110 
111  ANA_MSG_DEBUG ("changed submit-dir to " << data.submitDir);
112  }
113  break;
114 
115  case ManagerStep::extractOptions:
116  {
117  std::string mode = data.options.castString (Job::optSubmitDirMode);
118  if (!mode.empty())
119  {
120  if (mode == "no-clobber")
121  data.submitDirMode = SubmitDirMode::NO_CLOBBER;
122  else if (mode == "overwrite")
123  data.submitDirMode = SubmitDirMode::OVERWRITE;
124  else if (mode == "unique")
125  data.submitDirMode = SubmitDirMode::UNIQUE;
126  else if (mode == "unique-link")
127  data.submitDirMode = SubmitDirMode::UNIQUE_LINK;
128  else
129  {
130  ANA_MSG_ERROR ("unknown submit-dir mode: " << mode);
131  ANA_MSG_ERROR ("known modes: no-clobber, overwrite, unique, unique-link");
132  return ::StatusCode::FAILURE;
133  }
134  }
135 
136  if (data.options.castBool (Job::optRemoveSubmitDir, false))
137  {
138  if (!mode.empty())
139  {
140  ANA_MSG_ERROR ("can't specify both an explicit submit-dir mode and optRemoveSubmitDir");
141  return ::StatusCode::FAILURE;
142  }
143  data.submitDirMode = SubmitDirMode::OVERWRITE;
144  }
145  }
146  break;
147 
148  case ManagerStep::createSubmitDir:
149  {
150  ANA_MSG_DEBUG ("using submit-dir mode: " << unsigned (data.submitDirMode));
151 
152  bool success {false};
153  unsigned tries {0};
154  std::string submitDir;
155  std::size_t hash {0};
156  while (success == false && tries < 10)
157  {
158  tries += 1;
159 
160  switch (data.submitDirMode)
161  {
162  case SubmitDirMode::NO_CLOBBER:
163  case SubmitDirMode::OVERWRITE:
164  submitDir = data.submitDir;
165  break;
166 
167  case SubmitDirMode::UNIQUE:
168  case SubmitDirMode::UNIQUE_LINK:
169  {
170  timeval tv;
171  tm tvSplit;
172  if (gettimeofday (&tv, nullptr) == -1 ||
173  localtime_r (&tv.tv_sec, &tvSplit) == nullptr)
174  {
175  reportErrno ();
176  ANA_MSG_ERROR ("failed to get time of day???");
177  return ::StatusCode::FAILURE;
178  }
179  const std::string uniqueDateFormat {
180  data.options.castString (Job::optUniqueDateFormat,
181  "-%Y-%m-%d-%H%M-")};
182  char timeString [160];
183  strftime (timeString, sizeof (timeString),
184  uniqueDateFormat.c_str(), &tvSplit);
185 
186  // make a hash value and reduce it to 16 bits
187  boost::hash_combine (hash, std::hash<pid_t>() (getpid()));
188  boost::hash_combine (hash, std::hash<suseconds_t>() (tv.tv_usec));
189  std::size_t hash16 {hash};
190  while (hash16 > 0xffff)
191  hash16 = (hash16&0xffff) ^ (hash16 >> 16);
192 
193  // we are appending both a date and a unique hash
194  // here, in most cases the date should be sufficient
195  // to make it unique, but for unit tests a lot of jobs
196  // may be submitted in rapid succcession, so having
197  // some random suffix should help avoid clashes. the
198  // date is purposely put first because in some cases
199  // that can provide a useful ordering of output
200  // directories, i.e. the latest results will be listed
201  // last.
202  submitDir = data.submitDir + timeString +
203  std::format("{:04x}", hash16);
204  ANA_MSG_DEBUG ("unique submit-dir: " << submitDir);
205  }
206  break;
207  }
208 
209  if (::mkdir (submitDir.c_str(), 0777) == 0)
210  {
211  success = true;
212  } else
213  {
214  const int myerrno {errno};
215  if (myerrno == EEXIST)
216  {
217  switch (data.submitDirMode)
218  {
219  case SubmitDirMode::NO_CLOBBER:
220  ANA_MSG_ERROR ("cowardly refusing to overwrite " << submitDir);
221  ANA_MSG_ERROR ("change the name or remove file/directory already there");
222  return ::StatusCode::FAILURE;
223  case SubmitDirMode::OVERWRITE:
224  if (tries > 1)
225  {
226  ANA_MSG_ERROR ("failed to remove directory " << submitDir);
227  ANA_MSG_ERROR ("please try to remove it manually");
228  return ::StatusCode::FAILURE;
229  }
230  ANA_MSG_DEBUG ("removing directory " << submitDir);
231  gSystem->Exec (("rm -rf " + submitDir).c_str());
232  break;
233  case SubmitDirMode::UNIQUE:
234  case SubmitDirMode::UNIQUE_LINK:
235  // just pass through, try again with a new directory
236  // name next time
237  break;
238  }
239  } else
240  {
241  reportErrno (myerrno);
242  ANA_MSG_ERROR ("failed to create directory: " << submitDir);
243  return ::StatusCode::FAILURE;
244  }
245  }
246  }
247 
248  if (success)
249  {
250  ANA_MSG_INFO ("created submission directory " + submitDir);
251  switch (data.submitDirMode)
252  {
253  case SubmitDirMode::OVERWRITE:
254  case SubmitDirMode::NO_CLOBBER:
255  case SubmitDirMode::UNIQUE:
256  // no-op
257  break;
258 
259  case SubmitDirMode::UNIQUE_LINK:
260  {
261  if (unlink (data.submitDir.c_str()) == -1 && errno != ENOENT)
262  {
263  reportErrno ();
264  ANA_MSG_ERROR ("failed to remove: " << data.submitDir);
265  return ::StatusCode::FAILURE;
266  }
267 
268  std::string file = submitDir.substr (submitDir.rfind ('/')+1);
269  if (symlink (file.c_str(), data.submitDir.c_str()) == -1)
270  {
271  reportErrno ();
272  ANA_MSG_ERROR ("failed to create symlink at: " << data.submitDir);
273  return ::StatusCode::FAILURE;
274  }
275  ANA_MSG_INFO ("created sym-link at: " << data.submitDir);
276  }
277  break;
278  }
279  data.submitDir = submitDir;
280  } else
281  {
282  ANA_MSG_ERROR ("tried " << tries << " times to create directory and failed: " << data.submitDir);
283  ANA_MSG_ERROR ("try removing existing directory manually");
284  return ::StatusCode::FAILURE;
285  }
286  }
287  break;
288 
289  default:
290  // no-op
291  break;
292  }
293  return ::StatusCode::SUCCESS;
294  }
295  }
296 }
data
char data[hepevt_bytes_allocation_ATLAS]
Definition: HepEvt.cxx:11
vtune_athena.format
format
Definition: vtune_athena.py:14
plot_material.mkdir
def mkdir(path, recursive=True)
Definition: plot_material.py:16
EL::Job::optRemoveSubmitDir
static const std::string optRemoveSubmitDir
description: the name of the option for overwriting the submission directory.
Definition: Job.h:189
Job.h
ANA_MSG_ERROR
#define ANA_MSG_ERROR(xmsg)
Macro printing error messages.
Definition: Control/AthToolSupport/AsgMessaging/AsgMessaging/MessageCheck.h:294
Assert.h
const
bool const RAWDATA *ch2 const
Definition: LArRodBlockPhysicsV0.cxx:560
MessageCheck.h
SubmitDirManager.h
PrepareReferenceFile.regex
regex
Definition: PrepareReferenceFile.py:43
ManagerOrder.h
FullCPAlgorithmsTest_eljob.submitDir
submitDir
Definition: FullCPAlgorithmsTest_eljob.py:160
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ManagerData.h
ANA_MSG_INFO
#define ANA_MSG_INFO(xmsg)
Macro printing info messages.
Definition: Control/AthToolSupport/AsgMessaging/AsgMessaging/MessageCheck.h:290
file
TFile * file
Definition: tile_monitor.h:29
EL
This module defines the arguments passed from the BATCH driver to the BATCH worker.
Definition: AlgorithmWorkerData.h:24
Preparation.mode
mode
Definition: Preparation.py:94
StatusCode.h
xAOD::DiTauJetParameters::Detail
Detail
Definition: DiTauDefs.h:38
python.AtlCoolLib.timeString
def timeString(iovkey)
Definition: AtlCoolLib.py:120
CaloCondBlobAlgs_fillNoiseFromASCII.hash
dictionary hash
Definition: CaloCondBlobAlgs_fillNoiseFromASCII.py:109
EL::Detail::ManagerData
an internal data structure for passing data between different manager objects anbd step
Definition: ManagerData.h:46
EL::Job::optSubmitDirMode
static const std::string optSubmitDirMode
the submit-dir mode (allowed values: "no-clobber", "overwrite", "unique", "unique-link")
Definition: Job.h:195
error
Definition: IImpactPoint3dEstimator.h:70
EL::Job::optUniqueDateFormat
static const std::string optUniqueDateFormat
the date-format to use when generating unique submission directory names
Definition: Job.h:200
match
bool match(std::string s1, std::string s2)
match the individual directories of two strings
Definition: hcg.cxx:356
ANA_MSG_DEBUG
#define ANA_MSG_DEBUG(xmsg)
Macro printing debug messages.
Definition: Control/AthToolSupport/AsgMessaging/AsgMessaging/MessageCheck.h:288