ATLAS Offline Software
CheckSampleStatus.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration
4 
5 
14 
15 # Set the default tags you want to use for your sample status check
16 # For MC15c, see https://twiki.cern.ch/twiki/bin/view/AtlasProtected/AtlasProductionGroupMC15c
17 defaultRecoTagFilteringPattern = "r7" # this is for the rucio query, which is filtered by the below
18 defaultSimTags = ["s2608", "s2726"] # MC15 standard tags
19 #defaultRecoTags = ["r7772", "r7725"] # only r-tags as combining with s-tags
20 defaultRecoTags = ["r7772", "r7725", "a818", "a821"] # for AF-II, enable this line
21 defaultDerivationTags = ["p2622", "p2623", "p2613", "p2614"] # in order of priority (in case several are available)
22 
23 import sys, os, argparse, subprocess, re, pickle
24 
25 def runCommand(cmd, verbose = False):
26  if verbose:
27  print " Will run the following command: %s" % cmd
28  cmdResult = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
29  return cmdResult.stdout
30 
32  projectTag = "mc15_13TeV",
33  simTags = defaultSimTags,
34  recoTags = defaultRecoTags,
35  verbose = False,
36  format = "DAOD_SUSY1",
37  pTags = [],
38  derivation = ""):
39 
40  pattern = projectTag + '.' + dsid +'.*AOD*'
41 
42  # do the rucio query
43  queryPattern = pattern + defaultRecoTagFilteringPattern + "*"
44  cmd = "rucio ls --short --filter type=CONTAINER %s | sort -r " % (queryPattern) # sort -r prioritizes higher e-tags
45  queryResult = runCommand(cmd, verbose)
46 
47  # preprocess the output
48  lines = [line.rstrip().replace(projectTag+":", "") for line in queryResult.readlines()]
49 
50  dsPattern = pattern.replace(defaultRecoTagFilteringPattern+"*", "").replace("*", ".*")
51  dsRE = re.compile(dsPattern)
52  datasets = filter(dsRE.match, lines)
53  if verbose:
54  print " Dataset regex: %s" % dsPattern
55  print " All datasets: "
56  for ds in datasets:
57  print " %s" % ds
58 
59  aodPattern = dsPattern.replace("AOD.*", "\.merge\.AOD\..*")
60  aodPattern += "("+'|'.join(simTags)+")?.*("+'|'.join(recoTags)+").*"
61  aodRE = re.compile(aodPattern)
62  aodDatasets = filter(aodRE.match, datasets)
63  if verbose:
64  print " AOD regex: %s" % aodPattern
65  print " AOD datasets: "
66  for ds in aodDatasets:
67  print " %s" % ds
68 
69  if not aodDatasets:
70  aodDatasets.append(" N/A ")
71 
72  daodPattern = aodPattern.replace("AOD", format)
73  daodPattern += "("+'|'.join(pTags)+")"
74  daodRE = re.compile(daodPattern)
75  daodDatasets = filter(daodRE.match, datasets)
76  if verbose:
77  print " DAOD regex: %s" % daodPattern
78  print " DAOD datasets:"
79  for ds in daodDatasets:
80  print " %s" % ds
81  # if there are several matching the requested tags, pick the one with the tag mentioned first in the list
82  if len(daodDatasets) > 1:
83  print "Found more than one:"
84  for ds in daodDatasets:
85  print " %s" % ds
86  for pTag in pTags:
87  if any(ds for ds in daodDatasets if pTag in ds):
88  daodDatasets = [next(ds for ds in daodDatasets if pTag in ds)]
89  break # we're done after we've found the first
90  print "Was the right one selected?"
91  print " %s" % daodDatasets[0]
92 
93  # if there are no matching DAOD datasets, fill a dummy string
94  if len(daodDatasets) < 1:
95  daodDatasets.append(" N/A ")
96 
97  # check for non-dataset line in the output - they could be e.g. error messages
98  otherLines = [x for x in lines if x not in datasets]
99  if len(otherLines) > 0:
100  print "Other lines were found in the query output - there may have been a problem:"
101  for line in otherLines:
102  print line
103 
104  returnDict = {"AOD": aodDatasets[0], "DAOD": daodDatasets[0]}
105  return returnDict
106 
107 def getAmiStatus(ds, verbose):
108  cmd = "ami show dataset info %s | grep prodsysStatus" % ds
109  amiResult = runCommand(cmd, verbose)
110  lines = [line.rstrip() for line in amiResult.readlines()]
111  if verbose:
112  for line in lines:
113  print line
114  if len(lines) == 1:
115  amiStatus = lines[0].split(':')[1].strip()
116  elif len(lines) > 1:
117  print "Weird AMI status for %s (saving N/A):" % ds
118  print lines
119  amiStatus = "N/A"
120  else:
121  print "No AMI status for %s" % ds
122  amiStatus = "N/A"
123  return amiStatus
124 
126  row_format ="{0:<15}{1:^30}{2:^30}{3:<120}"
127  print row_format.format("=== DSID ===", "=== AOD ===", "=== DAOD (AOD) ===", "=== DAOD dataset name ===")
128  for ds in s:
129  dsName = s[ds]["DAOD"]
130  if dsName == " N/A ":
131  dsName = "(AOD: " + s[ds]["AOD"] + ")"
132  print row_format.format(ds, s[ds]["AODstatus"], s[ds]["DAODstatus"], dsName)
133 
134 def main():
135 
136  parser = argparse.ArgumentParser(description='Check status of datasets based on DSIDs and AMI tags. Written by C. Ohm & M. Tripiana')
137  parser.add_argument('-p', '--projectTag', type=str, nargs='?', help='Project tag, defaults to "mc15_13TeV"', default='mc15_13TeV')
138  parser.add_argument('-d', '--dsids', type=str, nargs='?', help='Text file(s) containing DSIDs', default='')
139  parser.add_argument('-f', '--format', type=str, help='Format: DAOD_SUSY1 (default), DAOD_SUSY2, ...', default='DAOD_SUSY1')
140  parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose mode, more detailed output and commands, etc')
141  parser.add_argument('-o', '--outfile', nargs='?', type=argparse.FileType('w'), help='Save the dict holding the sample info to a pickle file')
142  parser.add_argument('-i', '--infile', nargs='?', type=argparse.FileType('r'), help='Open a pickle file containing a dict from a previous session')
143 
144  args = parser.parse_args()
145  if args.verbose:
146  print args
147 
148  if args.infile:
149  with args.infile as handle:
150  samples = pickle.load(handle)
151  printSamplesDict(samples)
152  sys.exit()
153 
154  if not args.dsids:
155  #print '\nUps! You need to provide an input file with the DSIDs to be tested.\n'
156  parser.print_help()
157  sys.exit()
158 
159  f = open(args.dsids)
160  dsids = [line.rstrip('\n') for line in f]
161 
162  samples = {}
163 
164  for dsid in dsids:
165  if not dsid or dsid.startswith('#'):
166  continue
167  #if args.verbose:
168  print "Checking DSID %s..." % dsid
169  samples[dsid] = getSamplesFromPattern(dsid.strip(), args.projectTag, defaultSimTags, defaultRecoTags, args.verbose, args.format, defaultDerivationTags)
170 
171  if args.verbose:
172  print "These samples were found:"
173  for dsid in samples:
174  print "DSID: %s" % dsid
175  for ds in samples[dsid]:
176  print " %s: %s" % (ds, samples[dsid][ds])
177 
178  # now check the status of each according to AMI:
179  for dsid in samples:
180  for ds in samples[dsid].keys():
181  if ds == " N/A ":
182  samples[dsid][ds+"status"] = " N/A "
183  samples[dsid][ds+"status"] = getAmiStatus(samples[dsid][ds], args.verbose)
184 
185  print "Done, here is the status of your samples"
186  if args.verbose:
187  print "Here's the dict holding all the extracted info:"
188  print samples
189 
190  if args.outfile:
191  with args.outfile as handle:
192  pickle.dump(samples, handle)
193  print "Python dict with sample info saved to %s - you can read it in with the -i option" % args.outfile.name
194 
195  printSamplesDict(samples)
196 
197 if __name__ == '__main__':
198  main()
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
CheckSampleStatus.main
def main()
Definition: CheckSampleStatus.py:134
covarianceTool.filter
filter
Definition: covarianceTool.py:514
fillPileUpNoiseLumi.next
next
Definition: fillPileUpNoiseLumi.py:52
CheckSampleStatus.getAmiStatus
def getAmiStatus(ds, verbose)
Definition: CheckSampleStatus.py:107
CheckSampleStatus.getSamplesFromPattern
def getSamplesFromPattern(dsid, projectTag="mc15_13TeV", simTags=defaultSimTags, recoTags=defaultRecoTags, verbose=False, format="DAOD_SUSY1", pTags=[], derivation="")
Definition: CheckSampleStatus.py:31
CheckSampleStatus.runCommand
def runCommand(cmd, verbose=False)
Definition: CheckSampleStatus.py:25
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
CheckSampleStatus.printSamplesDict
def printSamplesDict(s)
Definition: CheckSampleStatus.py:125
Trk::open
@ open
Definition: BinningType.h:40
python.Bindings.keys
keys
Definition: Control/AthenaPython/python/Bindings.py:798
Trk::split
@ split
Definition: LayerMaterialProperties.h:38