8 __authors__  = [
'Juerg Beringer', 
'Carl Suster']
 
    9 __version__ = 
'runJobs.py atlas/athena' 
   10 __usage__   = 
"""%prog [options] JOBOPTIONTEMPLATE DATASET TASK 
   12 Templates: - InDetBeamSpotExample/VertexTemplate.py 
   13            - InDetBeamSpotExample/*Template.py 
   14            - your own template file 
   16 See the comment field of jobs (e.g. MON.DB_BEAMSPOT jobs) in the beam spot 
   17 summary webpage for real usage examples. 
   25 import InDetBeamSpotExample
 
   26 from InDetBeamSpotExample 
import TaskManager
 
   27 from InDetBeamSpotExample 
import DiskUtils
 
   30     ''' Reads several legacy options to work out what input data to use. ''' 
   31     if options.legacy_griduser:
 
   33     elif options.legacy_fromcastor:
 
   35         pattern = options.legacy_filter 
or (
None if options.bytestream 
else '.*ESD.*')
 
   36         fs = DiskUtils.FileSet.from_directory(inputdata).matching(pattern)
 
   37     elif os.path.isfile(inputdata):
 
   39         fs = DiskUtils.FileSet.from_file_containing_list(inputdata)
 
   40     elif options.legacy_runoverdpd:
 
   42         rundir = os.path.join(os.getcwd(), dsname)
 
   43         if not os.path.exists(rundir):
 
   44             raise Exception(
'Run ' + dsname + 
' (directory ' + rundir + 
') not found')
 
   45         dpddir = os.path.join(rundir, inputdata)
 
   46         if not os.path.exists(dpddir):
 
   47             raise Exception(
'Dataset with name ' + inputdata + 
' (directory ' + dpddir + 
') not found')
 
   48         fs = DiskUtils.FileSet.from_glob(os.path.join(dpddir, 
'*', 
'*-dpd.root*'))
 
   51         pattern = options.legacy_filter 
or '*.root*' 
   52         fs = DiskUtils.FileSet.from_glob(os.path.join(inputdata, pattern))
 
   56     ''' Use new flags to work out input file list. ''' 
   57     if options.in_directory:
 
   58         fs = DiskUtils.FileSet.from_directory(options.in_directory)
 
   60         fs = DiskUtils.FileSet.from_file_containing_list(options.in_list)
 
   62             .matching(options.f_match)
 
   63             .excluding(options.f_exclude)
 
   69             'bytestream' : options.bytestream,
 
   70             'DataSource' : 
'geant4' if options.is_mc 
else 'data',
 
   71             'evtmax' : options.evtmax,
 
   72             'maxjobs' : options.maxjobs,
 
   73             'outputlevel' : options.outputlevel,
 
   74             'logmail' : options.users,
 
   75             'alignmentfile' : options.alignmentfile,
 
   76             'beamspotfile' : options.beamspotfile,
 
   77             'autoconfparams' : options.autoconfparams,
 
   78             'taskpostprocsteps' : options.postprocsteps,
 
   79             'filesperjob' : options.nfiles,
 
   80             'lbperjob' : options.lbperjob,
 
   81             'batchqueue' : options.batchqueue,
 
   82             'gridsite' : options.gridsite,
 
   83             'addinputtopoolcatalog' : 
not (options.bytestream 
or options.submit == 
'grid' or options.legacy_griduser),
 
   91     if options.outputfilelist:
 
   92         flags[
'outputfilelist'] = [ f.strip() 
for f 
in options.outputfilelist.split(
',') ]
 
   94         flags[
'outputfilelist'] = [
'dpd.root', 
'nt.root', 
'monitoring.root', 
'beamspot.db']
 
   97         flags[
'griduser'] = options.legacy_griduser
 
   99         flags[
'griduser'] = 
'.'.
join([
'user', options.grid_user 
or os.getenv(
'USER')])
 
  101     if options.legacy_runoverdpd 
and not options.lbperjob:
 
  104     for s 
in options.params.split(
', '):
 
  108                 flags[p[0].strip()] = eval(p[1].strip())
 
  110                 print (
'\nERROR parsing user parameter', p, 
'- parameter will be ignored')
 
  115     runner_class = InDetBeamSpotExample.loadClass(runner_type)
 
  116     return runner_class(**flags)
 
  118 if __name__ == 
'__main__':
 
  119     cmd = subprocess.list2cmdline(sys.argv)
 
  121     from optparse 
import OptionParser, OptionGroup
 
  122     parser = OptionParser(usage=__usage__, version=__version__)
 
  123     parser.add_option(
'', 
'--bytestream', dest=
'bytestream', action=
'store_true', default=
False,
 
  124             help=
'input files are bytestream instead of ROOT/POOL files')
 
  125     parser.add_option(
'-m', 
'--mc', dest=
'is_mc', action=
'store_true', default=
False,
 
  126             help=
'input data is from Monte-Carlo instead of data (automatically chooses between COMP200 and OFLP200 / CONDBR2 conditions DBs)')
 
  127     parser.add_option(
'-j', 
'--maxjobs', dest=
'maxjobs', type=
'int', default=0,
 
  128             help=
'max number of jobs (default: 0 ie no maximum)')
 
  129     parser.add_option(
'', 
'--files-per-job', dest=
'nfiles', type=
'int', default=1, metavar=
'N',
 
  130             help=
'number of files per job (default: 1, set to 0 for single job over all files)')
 
  131     parser.add_option(
'-e', 
'--maxevents', dest=
'evtmax', type=
'int', default=-1,
 
  132             help=
'max number of events per job')
 
  133     parser.add_option(
'', 
'--lbperjob', dest=
'lbperjob', type=
'int', default=0, metavar=
'N',
 
  134             help=
'number of luminosity blocks per job (default: 0 - no bunching)')
 
  135     parser.add_option(
'-o', 
'--outputfilelist', dest=
'outputfilelist', default=
'', metavar=
'FILES',
 
  136             help=
'list of desired output files (default: "dpd.root,nt.root,monitoring.root,beamspot.db"; must be specified explicitly for grid)')
 
  137     parser.add_option(
'-k', 
'--taskdb', dest=
'taskdb', default=
'',
 
  138             help=
'TaskManager database (default: from TASKDB or sqlite_file:taskdata.db; set to string None to avoid using a task database)')
 
  139     parser.add_option(
'-l', 
'--logmail', dest=
'users', default=
'', metavar=
'USERS',
 
  140             help=
'send log mail to specified users (default: no mail)')
 
  141     parser.add_option(
'-z', 
'--postprocsteps', dest=
'postprocsteps', default=
'JobPostProcessing', metavar=
'STEPS',
 
  142             help=
'Task-level postprocessing steps (Default: JobPostProcessing)')
 
  143     parser.add_option(
'-t', 
'--test', dest=
'testonly', action=
'store_true', default=
False,
 
  144             help=
'show only options and input files')
 
  145     parser.add_option(
'-v', 
'--verbosity', dest=
'outputlevel', type=
'int', default=4, metavar=
'LEVEL',
 
  146             help=
'output level (default:4, where 1=VERBOSE, 2=DEBUG, 3=INFO, 4=WARNING, 5=ERROR, 6=FATAL)')
 
  147     parser.add_option(
'-p', 
'--params', dest=
'params', default=
'',
 
  148             help=
'job option parameters to pass to job option template')
 
  149     parser.add_option(
'', 
'--autoconfparams', dest=
'autoconfparams', default=
'DetDescrVersion',
 
  150             help=
'comma-separated list of automatically determined parameters (template must include AutoConfFragment.py, default: "DetDescrVersion")')
 
  154     parser.add_option(
'-a', 
'--alignment-file', dest=
'alignmentfile', default=
'', metavar=
'FILE',
 
  155             help=
'alignment file (default: none)')
 
  156     parser.add_option(
'-b', 
'--beamspot-file', dest=
'beamspotfile', default=
'', metavar=
'FILE',
 
  157             help=
'beam spot SQLite file (default: none)')
 
  159     execopt = OptionGroup(parser, 
'Execution Options')
 
  160     execopt.add_option(
'', 
'--submit', dest=
'submit', default=
'condor', metavar=
'TYPE',
 
  161             choices=[
'grid', 
'lsf', 
'shell', 
'bg', 
'pdsf', 
'simple', 
'condor'],
 
  162             help=
'submission type (default: condor, choices: grid,lsf,shell,bg,pdsf,simple,condor)')
 
  163     execopt.add_option(
'', 
'--grid-user', dest=
'grid_user', default=
None, metavar=
'USER',
 
  164             help=
'grid username (default: $USER)')
 
  165     execopt.add_option(
'', 
'--grid-site', dest=
'gridsite', default=
'AUTO', metavar=
'SITE',
 
  166             help=
'site name where jobs are sent (default: AUTO)')
 
  167     execopt.add_option(
'-q', 
'--queue', dest=
'batchqueue', default=
'atlasb1',
 
  168             help=
'batch queue (default: atlasb1)')
 
  169     parser.add_option_group(execopt)
 
  171     inopt = OptionGroup(parser, 
'Input File Options',
 
  172             "One of these must be specified.")
 
  173     inopt.add_option(
'', 
'--directory', dest=
'in_directory', metavar=
'DIR',
 
  174             help=
'run over all matching files in the directory')
 
  175     inopt.add_option(
'', 
'--file-list', dest=
'in_list', metavar=
'FILE',
 
  176             help=
'run over all matching files in the directory')
 
  177     inopt.add_option(
'', 
'--dsid', dest=
'in_dsid', metavar=
'DSID',
 
  178             help=
'run over a rucio DSID')
 
  179     parser.add_option_group(inopt)
 
  181     filtopt = OptionGroup(parser, 
'Input Filtering Options',
 
  182             "Optional filters to select input files.")
 
  183     inopt.add_option(
'', 
'--match', dest=
'f_match', default=
None, metavar=
'REGEX',
 
  184             help=
'keep only files matching the pattern')
 
  185     inopt.add_option(
'', 
'--exclude', dest=
'f_exclude', default=
None, metavar=
'REGEX',
 
  186             help=
'skip files matching the pattern')
 
  187     parser.add_option_group(filtopt)
 
  190     deprecated = OptionGroup(parser, 
'Deprecated Options')
 
  191     deprecated.add_option(
'-c', 
'--castor', dest=
'legacy_fromcastor', action=
'store_true', default=
False,
 
  192             help=
'INPUTDATA refers to CASTOR directory')
 
  193     deprecated.add_option(
'', 
'--prefix', dest=
'legacy_prefix', default=
'',
 
  194             help=
'Prefix for reading files from mass storage (ignored)')
 
  195     deprecated.add_option(
'-d', 
'--dpd', dest=
'legacy_runoverdpd', action=
'store_true', default=
False,
 
  196             help=
'run over DPD (single job, INPUTDATA is DPD task name)')
 
  197     deprecated.add_option(
'-i', 
'--interactive', dest=
'legacy_interactive', action=
'store_true', default=
False,
 
  198             help=
'run interatively (same as -r JobRunner)')
 
  199     deprecated.add_option(
'-f', 
'--filter', dest=
'legacy_filter', default=
'',
 
  200             help=
'use specified pattern to filter input files (default: *.root* for local files, .*ESD.* for castor)')
 
  201     deprecated.add_option(
'-g', 
'--grid', dest=
'legacy_griduser', default=
'',
 
  202             help=
'run on grid (GRIDUSER is user prefix of grid job name, e.g. user09.JuergBeringer; INPUTDATA is grid dataset name)')
 
  203     deprecated.add_option(
'-s', 
'--gridsite', dest=
'gridsite', default=
'AUTO',
 
  204             help=
'deprecated spelling of --grid-site')
 
  205     deprecated.add_option(
'-r', 
'--runner', dest=
'legacy_runner', default=
'LSFJobRunner',
 
  206             help=
'type of JobRunner (default: LSFJobRunner or PandaJobRunner)')
 
  207     deprecated.add_option(
'-w', 
'--wait', dest=
'legacy_dowait', action=
'store_true', default=
False,
 
  208             help=
'wait for jobs to complete')
 
  209     deprecated.add_option(
'-n', 
'--nfiles', dest=
'nfiles', type=
'int',
 
  210             help=
'deprecated spelling of --files-per-job')
 
  211     parser.add_option_group(deprecated)
 
  213     (opts,args) = parser.parse_args()
 
  214     if len(args) 
not in [3, 4]:
 
  215         parser.error(
'wrong number of command line arguments')
 
  217     joboptiontemplate = args[0]
 
  221     legacy_options = len(args) == 4
 
  223         print (
"WARNING: the four-argument invocation of runJobs is deprecated")
 
  224         print (
"WARNING: enabling (imperfect) legacy compatibility mode")
 
  226         grid_mode = 
bool(opts.legacy_griduser)
 
  227         runner_type = opts.legacy_runner
 
  229             runner_type = 
'PandaJobRunner' 
  230         if opts.legacy_interactive:
 
  231             runner_type = 
'JobRunner' 
  234         grid_mode = opts.submit == 
'grid' 
  236                 'lsf': 
'LSFJobRunner',
 
  237                 'grid': 
'PandaJobRunner',
 
  238                 'shell': 
'ShellJobRunner',
 
  239                 'bg': 
'BackgroundJobRunner',
 
  240                 'pdsf': 
'PDSFJobRunner',
 
  241                 'simple': 
'JobRunner',
 
  242                 'condor': 
'HTCondorJobRunner',
 
  246                 sys.exit(
'ERROR: For grid submission, a DSID must be given')
 
  248         sys.exit(
'ERROR: No input files found')
 
  251     flags[
'comment'] = cmd
 
  252     flags[
'inputfiles'] = files
 
  253     flags[
'joboptionpath'] = joboptiontemplate
 
  256         flags[
'inputds'] = files[0]
 
  258             sys.exit(
'ERROR: Bunching per LB not supported for grid jobs')
 
  259         if not opts.outputfilelist:
 
  260             sys.exit(
'ERROR: For grid jobs, must specify output files expclitly using option -o (e.g. -o dpd.root)')
 
  262     if opts.nfiles < 1 
or (opts.legacy_runoverdpd 
and opts.nfiles == 1):
 
  264         flags[
'filesperjob'] = len(files)
 
  266             sys.exit(
'ERROR: Must specify number of files per job explicitly when running on grid')
 
  268     workdir = os.path.join(os.getcwd(), dsname, taskname)
 
  269     flags[
'jobdir'] = os.path.join(workdir, 
'%(jobnr)03i')
 
  270     if os.path.exists(workdir):
 
  271         sys.exit(
"ERROR: Task %s exists already for dataset %s (directory %s)" % (taskname,dsname,workdir))
 
  274         flags[
'jobname'] = 
'-'.
join([dsname, taskname, 
'lb%(jobnr)03i'])
 
  276         flags[
'jobname'] = 
'-'.
join([dsname, taskname, 
'%(jobnr)03i'])
 
  277         if grid_mode 
or opts.legacy_runoverdpd:
 
  278             flags[
'jobname'] = dsname + 
'-' + taskname
 
  282     if opts.alignmentfile:
 
  283         runner.addFilesToPoolFileCatalog([opts.alignmentfile])
 
  286         runner.setParam(
'outputfileprefix',
'%(jobname)s-')
 
  287         runner.setParam(
'addinputtopoolcatalog',
False)   
 
  288         runner.registerToBeCopied(
'alignmentfile')
 
  289         runner.registerToBeCopied(
'beamspotfile')
 
  292     runner.showParams(-1)
 
  296     if grid_mode 
and opts.autoconfparams:
 
  297         print (
"WARNING: Automatic configuration of parameters such as DetDescrVersion doesn't work yet on the grid!")
 
  298         print (
"         Please be sure the values of each of the following parameters are specified explicitly above,")
 
  299         print (
"         unless the defaults in the job option template are correct:\n")
 
  300         print (
"           ", opts.autoconfparams)
 
  303     print (len(files), 
"input file(s)/dataset found.")
 
  305     if not opts.testonly:
 
  307         if opts.taskdb != 
'None':
 
  309                 with TaskManager.TaskManager(opts.taskdb) 
as taskman:
 
  310                     taskman.addTask(dsname,taskname,joboptiontemplate,runner.getParam(
'release'),runner.getNJobs(),opts.postprocsteps,comment=cmd)
 
  312                 print (
'WARNING: Unable to add task to task manager database ' + opts.taskdb)
 
  314         if opts.legacy_dowait 
and not grid_mode:
 
  315             if not opts.legacy_interactive: runner.wait()
 
  317             print (
"Job directories in %s for this task:" % workdir)
 
  319             os.system(
'ls -l %s' % workdir)
 
  321             print (
"The following output file(s) were produced:")
 
  323             print (runner.getOutputFiles())