ATLAS Offline Software
Loading...
Searching...
No Matches
CheckSampleStatus.py
Go to the documentation of this file.
1#!/usr/bin/env python
2
3# Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration
4
5
14
15# Set the default tags you want to use for your sample status check
16# For MC15c, see https://twiki.cern.ch/twiki/bin/view/AtlasProtected/AtlasProductionGroupMC15c
17defaultRecoTagFilteringPattern = "r7" # this is for the rucio query, which is filtered by the below
18defaultSimTags = ["s2608", "s2726"] # MC15 standard tags
19#defaultRecoTags = ["r7772", "r7725"] # only r-tags as combining with s-tags
20defaultRecoTags = ["r7772", "r7725", "a818", "a821"] # for AF-II, enable this line
21defaultDerivationTags = ["p2622", "p2623", "p2613", "p2614"] # in order of priority (in case several are available)
22
23import sys, os, argparse, subprocess, re, pickle
24
25def runCommand(cmd, verbose = False):
26 if verbose:
27 print " Will run the following command: %s" % cmd
28 cmdResult = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
29 return cmdResult.stdout
30
32 projectTag = "mc15_13TeV",
33 simTags = defaultSimTags,
34 recoTags = defaultRecoTags,
35 verbose = False,
36 format = "DAOD_SUSY1",
37 pTags = [],
38 derivation = ""):
39
40 pattern = projectTag + '.' + dsid +'.*AOD*'
41
42 # do the rucio query
43 queryPattern = pattern + defaultRecoTagFilteringPattern + "*"
44 cmd = "rucio ls --short --filter type=CONTAINER %s | sort -r " % (queryPattern) # sort -r prioritizes higher e-tags
45 queryResult = runCommand(cmd, verbose)
46
47 # preprocess the output
48 lines = [line.rstrip().replace(projectTag+":", "") for line in queryResult.readlines()]
49
50 dsPattern = pattern.replace(defaultRecoTagFilteringPattern+"*", "").replace("*", ".*")
51 dsRE = re.compile(dsPattern)
52 datasets = filter(dsRE.match, lines)
53 if verbose:
54 print " Dataset regex: %s" % dsPattern
55 print " All datasets: "
56 for ds in datasets:
57 print " %s" % ds
58
59 aodPattern = dsPattern.replace("AOD.*", "\.merge\.AOD\..*")
60 aodPattern += "("+'|'.join(simTags)+")?.*("+'|'.join(recoTags)+").*"
61 aodRE = re.compile(aodPattern)
62 aodDatasets = filter(aodRE.match, datasets)
63 if verbose:
64 print " AOD regex: %s" % aodPattern
65 print " AOD datasets: "
66 for ds in aodDatasets:
67 print " %s" % ds
68
69 if not aodDatasets:
70 aodDatasets.append(" N/A ")
71
72 daodPattern = aodPattern.replace("AOD", format)
73 daodPattern += "("+'|'.join(pTags)+")"
74 daodRE = re.compile(daodPattern)
75 daodDatasets = filter(daodRE.match, datasets)
76 if verbose:
77 print " DAOD regex: %s" % daodPattern
78 print " DAOD datasets:"
79 for ds in daodDatasets:
80 print " %s" % ds
81 # if there are several matching the requested tags, pick the one with the tag mentioned first in the list
82 if len(daodDatasets) > 1:
83 print "Found more than one:"
84 for ds in daodDatasets:
85 print " %s" % ds
86 for pTag in pTags:
87 if any(ds for ds in daodDatasets if pTag in ds):
88 daodDatasets = [next(ds for ds in daodDatasets if pTag in ds)]
89 break # we're done after we've found the first
90 print "Was the right one selected?"
91 print " %s" % daodDatasets[0]
92
93 # if there are no matching DAOD datasets, fill a dummy string
94 if len(daodDatasets) < 1:
95 daodDatasets.append(" N/A ")
96
97 # check for non-dataset line in the output - they could be e.g. error messages
98 otherLines = [x for x in lines if x not in datasets]
99 if len(otherLines) > 0:
100 print "Other lines were found in the query output - there may have been a problem:"
101 for line in otherLines:
102 print line
103
104 returnDict = {"AOD": aodDatasets[0], "DAOD": daodDatasets[0]}
105 return returnDict
106
107def getAmiStatus(ds, verbose):
108 cmd = "ami show dataset info %s | grep prodsysStatus" % ds
109 amiResult = runCommand(cmd, verbose)
110 lines = [line.rstrip() for line in amiResult.readlines()]
111 if verbose:
112 for line in lines:
113 print line
114 if len(lines) == 1:
115 amiStatus = lines[0].split(':')[1].strip()
116 elif len(lines) > 1:
117 print "Weird AMI status for %s (saving N/A):" % ds
118 print lines
119 amiStatus = "N/A"
120 else:
121 print "No AMI status for %s" % ds
122 amiStatus = "N/A"
123 return amiStatus
124
126 row_format ="{0:<15}{1:^30}{2:^30}{3:<120}"
127 print row_format.format("=== DSID ===", "=== AOD ===", "=== DAOD (AOD) ===", "=== DAOD dataset name ===")
128 for ds in s:
129 dsName = s[ds]["DAOD"]
130 if dsName == " N/A ":
131 dsName = "(AOD: " + s[ds]["AOD"] + ")"
132 print row_format.format(ds, s[ds]["AODstatus"], s[ds]["DAODstatus"], dsName)
133
134def main():
135
136 parser = argparse.ArgumentParser(description='Check status of datasets based on DSIDs and AMI tags. Written by C. Ohm & M. Tripiana')
137 parser.add_argument('-p', '--projectTag', type=str, nargs='?', help='Project tag, defaults to "mc15_13TeV"', default='mc15_13TeV')
138 parser.add_argument('-d', '--dsids', type=str, nargs='?', help='Text file(s) containing DSIDs', default='')
139 parser.add_argument('-f', '--format', type=str, help='Format: DAOD_SUSY1 (default), DAOD_SUSY2, ...', default='DAOD_SUSY1')
140 parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose mode, more detailed output and commands, etc')
141 parser.add_argument('-o', '--outfile', nargs='?', type=argparse.FileType('w'), help='Save the dict holding the sample info to a pickle file')
142 parser.add_argument('-i', '--infile', nargs='?', type=argparse.FileType('r'), help='Open a pickle file containing a dict from a previous session')
143
144 args = parser.parse_args()
145 if args.verbose:
146 print args
147
148 if args.infile:
149 with args.infile as handle:
150 samples = pickle.load(handle)
151 printSamplesDict(samples)
152 sys.exit()
153
154 if not args.dsids:
155 #print '\nUps! You need to provide an input file with the DSIDs to be tested.\n'
156 parser.print_help()
157 sys.exit()
158
159 f = open(args.dsids)
160 dsids = [line.rstrip('\n') for line in f]
161
162 samples = {}
163
164 for dsid in dsids:
165 if not dsid or dsid.startswith('#'):
166 continue
167 #if args.verbose:
168 print "Checking DSID %s..." % dsid
169 samples[dsid] = getSamplesFromPattern(dsid.strip(), args.projectTag, defaultSimTags, defaultRecoTags, args.verbose, args.format, defaultDerivationTags)
170
171 if args.verbose:
172 print "These samples were found:"
173 for dsid in samples:
174 print "DSID: %s" % dsid
175 for ds in samples[dsid]:
176 print " %s: %s" % (ds, samples[dsid][ds])
177
178 # now check the status of each according to AMI:
179 for dsid in samples:
180 for ds in samples[dsid].keys():
181 if ds == " N/A ":
182 samples[dsid][ds+"status"] = " N/A "
183 samples[dsid][ds+"status"] = getAmiStatus(samples[dsid][ds], args.verbose)
184
185 print "Done, here is the status of your samples"
186 if args.verbose:
187 print "Here's the dict holding all the extracted info:"
188 print samples
189
190 if args.outfile:
191 with args.outfile as handle:
192 pickle.dump(samples, handle)
193 print "Python dict with sample info saved to %s - you can read it in with the -i option" % args.outfile.name
194
195 printSamplesDict(samples)
196
197if __name__ == '__main__':
198 main()
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:310
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
runCommand(cmd, verbose=False)
getAmiStatus(ds, verbose)
getSamplesFromPattern(dsid, projectTag="mc15_13TeV", simTags=defaultSimTags, recoTags=defaultRecoTags, verbose=False, format="DAOD_SUSY1", pTags=[], derivation="")