ATLAS Offline Software
ExtractEvents.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
4 
5 
14 
15 import sys, os, argparse, subprocess, fnmatch
16 
17 # This is a bit ugly, but seems to be the only way for now
18 # See more info here: https://cern.service-now.com/service-portal/view-request.do?n=RQF0492611
19 #eospath = "/afs/cern.ch/project/eos/installation/0.3.84-aquamarine/bin/eos.select"
20 # Update: https://cern.service-now.com/service-portal/view-incident.do?n=INC1004372
21 eospath = "eos"
22 
23 def validEventList(eventList):
24  eventStrings = eventList.split(',')
25  events = []
26  for e in eventStrings:
27  try:
28  events.append(int(e))
29  except ValueError:
30  raise argparse.ArgumentTypeError("Can't convert %s to a valid event number" % e)
31  return events
32 
33 def validFileFormat(formatString):
34  if not any(x in formatString for x in ['RAW', 'ESD', 'AOD']):
35  return False
36  else:
37  return formatString
38 
39 def extractEvents(run, events, inputFiles, format, outputFile=""):
40  # form the components of the command for the final event extraction
41  runEvents = []
42  for e in events:
43  runEvents.append((run,e))
44  suffix = format
45  if 'RAW' in format:
46  suffix += '.data'
47  else:
48  suffix += '.pool.root'
49  if outputFile == "":
50  outputFile = "r%08d_" % run
51  if len(events) > 1:
52  outputFile += "multiple_events."
53  else:
54  outputFile += "e%012d." % events[0]
55  outputFile += suffix
56  inputFilesString = ""
57  for f in inputFiles:
58  inputFilesString += " %s" % f
59  cmd = "acmd.py filter-files -o %s -s '%s'%s" % (outputFile, str(runEvents), inputFilesString)
60 
61  # check that the output file does not already exist
62  if os.path.isfile(outputFile):
63  print ("Output file %s already exists - please remove it or choose a different name for the output file (-o/--outputfile)" % outputFile)
64 
65  # execute the command
66  print ("Will run the following command to start the extraction")
67  print (" %s" % cmd)
68  extraction = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
69  lines_iterator = iter(extraction.stdout.readline, b"")
70  for line in lines_iterator:
71  print((line.rstrip()))
72 
73  # if the output file is less than 500 bytes, it's probably empty
74  size = os.path.getsize(outputFile)
75  if size < 500:
76  print ("Size of output file only %d bytes, deleting it as most likely empty" % size)
77  os.remove(outputFile)
78  else:
79  print ("Done. Output saved to %s (%.1f kB)" % (outputFile, size/1024.))
80  if len(events) > 1:
81  print ("NB! You requested %d events, please check the log to see that all were found" % len(events))
82 
83 def main():
84 
85  parser = argparse.ArgumentParser(description='Extract events specified by run, lumi block and event number from ATLAS event files. Written by C. Ohm - feedback is very welcome!')
86  parser.add_argument('-r', '--run', required=True, type=int, help='Run number')
87  parser.add_argument('-lb', '--lumiblock', required=True, type=int, help='Luminosity block number')
88  parser.add_argument('-e', '--events', type=str, required=True, help='Event number(s), separated by commas if several (e.g. 12345 or 123,456,789)')
89  parser.add_argument('-p', '--projecttag', type=str, nargs='?', help='Project tag, defaults to "data15_13TeV"', default='data15_13TeV')
90  parser.add_argument('-s', '--stream', type=str, nargs='?', help='Stream name, defaults to "physics_Main"', default='physics_Main')
91  parser.add_argument('-f', '--fileformat', type=validFileFormat, help='File format: (D)RAW(_XYZ), (D)ESD(_XYZ), (D)AOD(_XYZ)', default='AOD')
92  parser.add_argument('-m', '--matchingstring', type=str, nargs='?', default='', help='String for matching the dataset to look in, useful when there are several processings available, or both merged and unmerged datasets, e.g. "*merge.AOD*f620*" will do what you think')
93  parser.add_argument('-o', '--outputfile', type=str, nargs='?', help='Name for the output file (appropriate suffix based on input format will be appended automatically)')
94  parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose mode, prints out eos paths and commands, file names, etc')
95  parser.add_argument('-i', '--inputfile', nargs='+', type=str, default='', help='Local input file(s), to bypass search in EOS disk buffer')
96 
97  args = parser.parse_args()
98  print (args)
99 
100  eventList = validEventList(args.events)
101 
102  # if the user provides input file(s), look for the event(s) in them!
103  if args.inputfile != "":
104  print ("Will look in specified input file(s)")
105  extractEvents(args.run, eventList, args.inputfile, args.fileformat)
106  sys.exit(0)
107 
108  if args.fileformat is False:
109  print ("Unknown argument for -f/--fileformat - please provide a valid string describing the file format, i.e. one containing 'RAW', 'ESD' or 'AOD'")
110  sys.exit(1)
111 
112  print ("Will try to extract event(s) %s in LB %d of run %d (project tag: %s, stream: %s, format: %s, matching string: \"%s\")" % (args.events, args.lumiblock, args.run, args.projecttag, args.stream, args.fileformat, args.matchingstring))
113 
114  filePath = "root://eosatlas.cern.ch//eos/atlas/atlastier0/rucio/%s/%s/%08d/" % (args.projecttag, args.stream, args.run)
115  #print (filePath)
116  cmd = eospath+" ls "+filePath
117  if args.verbose:
118  print ("Will run the following command to get a list of datasets matching the provided info")
119  print (" %s" % cmd)
120  env = os.environ.copy()
121  datasetListing = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env)
122  datasetNames = []
123  if args.verbose:
124  print ("Datasets before requiring match with pattern (%s):" % args.matchingstring)
125  for line in datasetListing.stdout.readlines():
126  if args.verbose:
127  print (" %s" % line.rstrip())
128  # skip log file tar balls
129  if "LOGARC.tar" in line:
130  continue
131  # RAW dataset don't have anything after "..daq.RAW"
132  if (".%s." % args.fileformat) in line or (args.fileformat == "RAW" and ".RAW" in line):
133  datasetNames.append(line.rstrip())
134 
135  # now get rid of all datasets that don't match the matching pattern
136  datasetNames = [ds for ds in datasetNames if ".LOGARC" not in ds]
137  if args.matchingstring != '':
138  if args.verbose:
139  print ("Removing datasets that don't match %s" % (args.matchingstring))
140  datasetNames = [ds for ds in datasetNames if fnmatch.fnmatch(ds, "*"+args.matchingstring+"*")]
141 
142  if len(datasetNames) > 1:
143  print ("More than one dataset matching the provided info")
144  for ds in datasetNames:
145  print (" %s" % ds)
146  print ("Please provide tighter constraints, e.g. by using the -m/--matchingstring option")
147  sys.exit(1)
148 
149  if len(datasetNames) == 0:
150  print ("No dataset matching the provided info - please provide looser constraints and or use the -v/--verbose switch to see more info about what datasets are available in EOS")
151  sys.exit(1)
152 
153  print ("Will use the following dataset found in EOS: %s" % datasetNames[0])
154 
155  cmd = eospath+" ls "+filePath+datasetNames[0]+'/'
156  if args.verbose:
157  print ("Will run the following command to get a list of files in the selected dataset")
158  print (" %s" % cmd)
159 
160  fileListing = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
161  fileNames = []
162  for line in fileListing.stdout.readlines():
163  print (line)
164  substrings = line.split('lb')
165  lbs = []
166  if line.count('lb') == 2:
167  lbs = range(int(substrings[1][0:4]), int(substrings[2][0:4])+1)
168  else:
169  lbs.append(int(substrings[1][0:4]))
170  if args.lumiblock in lbs:
171  fileNames.append(filePath+datasetNames[0]+"/"+line.rstrip())
172  if args.verbose:
173  print ("Will look for the sought event(s) in the following %d file(s):" % len(fileNames))
174  for file in fileNames:
175  print (" %s" % file)
176 
177  # if no files are found, tell the user and stop
178  if len(fileNames) == 0:
179  print ("No files available in %s - will exit" % (filePath+datasetNames[0]+'/'))
180  sys.exit(1)
181 
182  # actually extract the events
183  extractEvents(args.run, eventList, fileNames, args.fileformat)
184 
185 if __name__ == '__main__':
186  main()
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.ExtractEvents.validEventList
def validEventList(eventList)
Definition: ExtractEvents.py:23
python.ExtractEvents.validFileFormat
def validFileFormat(formatString)
Definition: ExtractEvents.py:33
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
python.ExtractEvents.main
def main()
Definition: ExtractEvents.py:83
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
python.ExtractEvents.extractEvents
def extractEvents(run, events, inputFiles, format, outputFile="")
Definition: ExtractEvents.py:39
str
Definition: BTagTrackIpAccessor.cxx:11