61 logging.basicConfig(format=
'%(levelname)s:%(message)s')
64 from pytz
import timezone
69 import pyAMI.atlas.api
as AtlasAPI
72 logging.error(
"Unable to find pyAMI client. Please try this command first: lsetup pyAMI")
76 extraFieldDefaults = {}
78 fieldDefaults = {
"subprocessID":0,
"dataset_number":0}
80 for field
in pyAMI.config.tables[
'datasets'].
keys():
81 if str(field) ==
"cross_section":
continue
82 if str(field)
in fieldDefaults.keys():
continue
83 if str(field).startswith(
"@"):
continue
84 fieldDefaults[
str(field)] =
None
88 status,out = subprocess.getstatusoutput(
"voms-proxy-info -fqan -exists")
90 logging.error(
"Please renew your certificate with this command: voms-proxy-init -voms atlas")
94 client = pyAMI.client.Client(
'atlas')
97 logging.error(
"Could not establish pyAMI session. Are you sure you have a valid certificate? Do: voms-proxy-init -voms atlas")
107 res = client.execute(
'ListPhysicsParameterDefs',format=
'dom_object')
108 for r
in res.get_rows() :
109 explainString =
"%s: %s" % (r[
u'PARAMNAME'],r[
u'DESCRIPTION'])
110 if r[
u'UNITS']!=
u'NULL':
111 explainString +=
" (units: %s)" % r[
u'UNITS']
112 paramUnits[r[
u'PARAMNAME']] = r[
u'UNITS']
113 if r[
u'HASDEFAULT']==
u'N' : paramDefaults[
str(r[
u'PARAMNAME'])] =
None
115 explainString +=
" (default value = %s)" % r[
u'DEFAULTVALUE']
116 if r[
u'PARAMTYPE']==
u'number': paramDefaults[
str(r[
u'PARAMNAME'])] =
float(r[
u'DEFAULTVALUE'])
117 elif r[
u'PARAMTYPE']==
u'string': paramDefaults[
str(r[
u'PARAMNAME'])] =
str(r[
u'DEFAULTVALUE'])
118 paramExplains += [explainString]
120 paramDefaults[
"crossSection_pb"] =
None
121 paramUnits[
"crossSection_pb"] =
"pb"
122 paramExplains += [
"crossSection_pb: Same as crossSection except in pb units (units: pb)"]
124 cern_time = timezone(
'UCT')
125 current_time = datetime.datetime.fromtimestamp(time.time(),cern_time).strftime(
'%Y-%m-%d %H:%M:%S')
127 from argparse
import RawTextHelpFormatter
128 parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
129 parser.add_argument(
'--inDS',nargs=
'+',default=[
""],help=
"List of datasets to retrieve parameters for")
130 parser.add_argument(
'--inDsTxt',default=
"",help=
"Alternative to --inDS, can specify the datasets from an input file")
131 parser.add_argument(
'--fields',nargs=
'+',help=
"List of parameters to extract. Available parameters are: \n\n %s\n\nYou can also include any from:\n %s\nYou can also do keyword_xxx to add a bool branch for keywords" % (
"\n ".
join(paramExplains),
", ".
join(
list(fieldDefaults.keys())+
list(extraFieldDefaults.keys()))),default=[
"dataset_number",
"crossSection",
"kFactor",
"genFiltEff"])
132 parser.add_argument(
'--timestamp',default=current_time,help=
"The timestamp to query parameters at, specified in Universal Central Time (UCT). If left blank, will take the current time")
133 parser.add_argument(
'--physicsGroups',nargs=
'+',default=[
"PMG,MCGN"],help=
"Physics group from which to retrieve parameters, listed in order of priority (highest first). Default value is 'PMG,MCGN' (i.e. try to use PMG values, fallback on MCGN values if unavailable). Allowed groups are:\n PMG (this is the PMG's group name), BPHY, COSM, DAPR, EGAM, EXOT, FTAG, HIGG, HION, IDET, IDTR, JETM, LARG, MCGN (this is the AMI default group name), MDET, MUON, PHYS, REPR, SIMU, STDM, SUSY, TAUP, TCAL, TDAQ, THLT, TOPQ, TRIG, UPGR, VALI")
135 parser.add_argument(
'--oldTimestamp',default=
"",help=
"If specified, will instead display a diff between the old and new timestamp, showing explanation of any changed parameters")
137 parser.add_argument(
'--explainFields',nargs=
'+',default=[],help=
"The fields you would like explained .. will appear as comment lines after each row in the output")
138 parser.add_argument(
'--explainInfo',nargs=
'+',default=[],help=
"Properties of the parameter you want to show in the explanation. Can list from: explanation, insert_time, physicsGroup, createdby")
139 parser.add_argument(
'--outFile',default=sys.stdout,type=argparse.FileType(
'w'),help=
"Where to print the output to. Leave blank to print to stdout")
140 parser.add_argument(
'--delim',default=
"",help=
"The delimiter character. Defaults to spaces leading to nice formatting table")
141 parser.add_argument(
'-v',action=
'store_true',help=
"Verbose output for debugging")
143 args = parser.parse_args()
145 if args.v: logging.getLogger().
setLevel(logging.DEBUG)
146 else: logging.getLogger().
setLevel(logging.INFO)
147 logging.debug(args.inDS)
148 logging.debug(args.fields)
149 logging.debug(args.timestamp)
151 if args.timestamp==
"the dawn of time":
152 logging.error(
"Unfortunately we don't know any parameters from this time period... but we're working on it!")
156 args.fields =
sum((y.split(
',')
for y
in args.fields),[])
157 args.fields = [x.strip()
for x
in args.fields]
160 for f
in args.fields:
161 if f.startswith(
"keyword_"):
164 extraFieldDefaults[
"keyword_%s"%k]=
bool(
False)
168 args.physicsGroups =
sum((y.split(
',')
for y
in args.physicsGroups),[])
169 args.physicsGroups = [x.strip()
for x
in args.physicsGroups]
173 args.explainFields =
sum((y.split(
',')
for y
in args.explainFields),[])
174 args.explainFields = [x.strip()
for x
in args.explainFields]
175 args.explainInfo =
sum((y.split(
',')
for y
in args.explainInfo),[])
176 args.explainInfo = [x.strip()
for x
in args.explainInfo]
181 args.inDS =
sum((y.split(
',')
for y
in args.inDS),[])
182 args.inDS = [x.strip()
for x
in args.inDS]
198 dsFields = [ x
for x
in args.fields
if x
in fieldDefaults.keys()
and x
not in [
"subprocessID",
"ldn"] ]
199 extraFields = [ x
for x
in args.fields
if x
in extraFieldDefaults.keys() ]
200 paramFields = [ x
for x
in args.fields
if x
in paramDefaults.keys() ]
202 if len(paramFields)>0
and args.physicsGroups==[
""]:
203 logging.error(
"You must specify at least one physics group. See -h for allowed groups")
207 fieldDefaults.update(paramDefaults)
209 fieldDefaults.update(extraFieldDefaults)
211 for field
in args.fields:
212 if field
not in fieldDefaults:
213 logging.error(
"%s is not a recognised field. Allowed fields are:", field)
214 logging.error(fieldDefaults.keys())
218 if args.oldTimestamp!=
"":
219 logging.info(
"oldTimestamp option specified. Running in diff mode...")
220 args.explainFields = args.fields
221 args.explainInfo = [
"explanation",
"insert_time",
"physicsGroup",
"createdby"]
225 args.inDS = [ds.replace(
"*",
"%")
for ds
in args.inDS]
226 args.inDS = [ds.rstrip(
"/")
for ds
in args.inDS]
228 if len(args.inDS)==0
or (len(args.inDS)==1
and args.inDS[0]==
""):
229 logging.error(
"No datasets provided. Please specify datasets with the --inDS or --inDsTxt options")
232 logging.info(
"Fetching list of datasets from AMI (this may take a few minutes)...")
236 res = AtlasAPI.list_datasets(client,patterns=args.inDS,fields=dsFields+[
'ldn'],ami_status=
"VALID")
238 logging.info(
"...Found %d datasets matching your selection", len(res))
245 dataset_values = dict()
248 dataset_values[
str(r[
'ldn'])] = mydict
249 for field
in r.items():
250 if str(field[0]) ==
"ldn":
continue
251 if str(field[0])
not in args.fields:
continue
252 mydict[
str(field[0])] =
str(field[1])
254 if len(extraFields)>0
or len(args.keywords)>0:
255 info_res = AtlasAPI.get_dataset_info(client,
str(r[
'ldn']))
258 logging.error(
"Unable to retrieve dataset info for %s", r[
'ldn'])
260 for field
in extraFields:
262 if field.startswith(
"keyword_"):
continue
263 mydict[field] =
float(info_res[0][
str(field)])
if isfloat(info_res[0][
str(field)])
else extraFieldDefaults[field]
264 for k
in args.keywords:
265 mydict[
"keyword_%s" % k] =
int( (k
in str(info_res[0][
str(
'keyword')]).
split(
",")) )
268 from collections
import OrderedDict
269 sorted_values = OrderedDict()
271 if ds
in dataset_values.keys():
272 sorted_values[ds] = dataset_values[ds]
274 for ds
in dataset_values.keys():
275 if ds
not in sorted_values.keys():
276 sorted_values[ds] = dataset_values[ds]
277 dataset_values = sorted_values
279 logging.debug(dataset_values)
284 if '%' not in ds
and ds
not in dataset_values.keys():
285 logging.warning(
"Unknown dataset: %s", ds)
287 datasetsToQuery =
",".
join(dataset_values.keys())
290 complete_values = OrderedDict()
291 if args.inDsTxt !=
"":
295 txt =
open(args.inDsTxt)
298 tmpLine = re.sub(
'\n',
'',tmpLine)
300 tmpLine = tmpLine.strip()
302 if tmpLine.startswith(
'#')
or tmpLine ==
'':
303 complete_values[
'comment%d'%(commentcount)] = tmpLine
304 commentcount = commentcount+1
307 tmpLine = tmpLine.rstrip(
"/")
308 if tmpLine
in dataset_values.keys():
309 complete_values[tmpLine] = dataset_values[tmpLine]
311 print(
"cannot find %s" % tmpLine)
314 dataset_values = complete_values
316 logging.info(
"Obtaining %s for selected datasets at timestamp=%s... (please be patient)",
317 args.fields, args.timestamp)
320 if(args.timestamp==current_time):
321 res = client.execute([
'GetPhysicsParamsForDataset',
"--logicalDatasetName=%s"% datasetsToQuery,
"--timestamp='%s'"%args.timestamp], format=
'dom_object')
323 res = client.execute([
'GetPhysicsParamsForDataset',
"--logicalDatasetName=%s"% datasetsToQuery,
"--timestamp='%s'"%args.timestamp,
"--history=true"], format=
'dom_object')
326 parameterQueryResults = dict()
327 for r
in res.get_rows():
328 if r[
u'logicalDatasetName']
not in parameterQueryResults.keys():
329 parameterQueryResults[r[
u'logicalDatasetName']] = []
330 parameterQueryResults[r[
u'logicalDatasetName']] += [r]
333 if args.oldTimestamp!=
"" :
334 logging.info(
"Obtaining %s for selected datasets at timestamp=%s... (please be patient)",
335 args.fields,args.oldTimestamp)
336 res2 = client.execute([
'GetPhysicsParamsForDataset',
"--logicalDatasetName=%s"% datasetsToQuery,
"--timestamp='%s'"%args.oldTimestamp,
"--history=true"], format=
'dom_object')
337 old_parameterQueryResults = dict()
338 for r
in res2.get_rows():
339 if r[
u'logicalDatasetName']
not in old_parameterQueryResults.keys():
340 old_parameterQueryResults[r[
u'logicalDatasetName']] = []
341 old_parameterQueryResults[r[
u'logicalDatasetName']] += [r]
352 for ds
in dataset_values.keys():
353 if ds.startswith(
'comment'):
354 if commentCount > 0 : commentCache +=
"\n"
355 commentCache += dataset_values[ds]
356 commentCount=commentCount+1
363 res = parameterQueryResults.get(ds,[])
364 if args.oldTimestamp!=
"": res2 = old_parameterQueryResults.get(ds,[])
369 sp =
int(r[
u'subprocessID'])
370 if sp
not in dsSubprocesses: dsSubprocesses += [sp]
374 for sp
in dsSubprocesses:
377 groupsWithVals = dict()
380 for i
in args.explainFields: explainInfo[i] = dict()
382 for param
in paramFields:
383 groupsWithVals[param] = []
384 bestGroupIndex = len(args.physicsGroups)
386 paramVals[param] = copy.copy(fieldDefaults[param])
388 if int(r[
u'subprocessID']) != sp:
continue
389 if str(r[
u'paramName']) != param
and not (param==
"crossSection_pb" and str(r[
u'paramName'])==
"crossSection"):
continue
390 if str(r[
u'physicsGroup'])
not in args.physicsGroups:
391 groupsWithVals[param] += [(
str(r[
u'physicsGroup']),
str(r[
u'paramValue']))]
393 if args.physicsGroups.index(
str(r[
u'physicsGroup'])) > bestGroupIndex :
continue
394 if args.physicsGroups.index(
str(r[
u'physicsGroup'])) == bestGroupIndex : logging.warning(
"Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!", param, r[
u'physicsGroup'], ds, sp)
395 paramVals[param] =
str(r[
u'paramValue'])
396 if param==
"crossSection_pb": paramVals[param] =
str(
float(paramVals[param])*1000.0)
397 bestGroupIndex=args.physicsGroups.index(
str(r[
u'physicsGroup']))
399 for e
in args.explainInfo:
401 logging.error(
"Unrecognised explainInfo field: %s", e)
403 explainInfo[param][e]=
str(r[
str(e)])
404 if args.oldTimestamp!=
"":
405 bestGroupIndex = len(args.physicsGroups)
406 paramVals2[param] = copy.copy(fieldDefaults[param])
408 if int(r[
u'subprocessID']) != sp:
continue
409 if str(r[
u'paramName']) != param
and not (param==
"crossSection_pb" and str(r[
u'paramName'])==
"crossSection"):
continue
410 if str(r[
u'physicsGroup'])
not in args.physicsGroups:
continue
411 if args.physicsGroups.index(
str(r[
u'physicsGroup'])) > bestGroupIndex :
continue
412 if args.physicsGroups.index(
str(r[
u'physicsGroup'])) == bestGroupIndex : logging.warning(
"Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!", param, r[
u'physicsGroup'], ds, sp)
413 paramVals2[param] =
str(r[
u'paramValue'])
414 if param==
"crossSection_pb": paramVals2[param] =
str(
float(paramVals2[param])*1000.0)
415 bestGroupIndex=args.physicsGroups.index(
str(r[
u'physicsGroup']))
421 for param
in args.fields:
423 if param ==
"ldn": val = ds
424 elif param ==
"subprocessID": val = sp
425 elif param
in dataset_values[ds].
keys(): val = dataset_values[ds][param]
426 else: val = paramVals.get(param,
None)
428 if args.outFile != sys.stdout: logging.warning(
"dataset %s (subprocess %d) does not have parameter %s, which has no default.",ds,sp,param)
429 if len(groupsWithVals.get(param,[]))>0:
430 logging.warning(
"The follow physicsGroups have defined that parameter though:")
431 logging.warning(groupsWithVals[param])
435 if args.oldTimestamp!=
"":
438 if param ==
"ldn": val2 = ds
439 elif param ==
"subprocessID": val2 = sp
440 elif param
in dataset_values[ds].
keys(): val2 = dataset_values[ds][param]
441 else: val2 = paramVals2.get(param,
None)
442 if val2
is None: val2 =
"#UNKNOWN#"
445 if not firstPrint:
print(
"%s:" % ds)
447 print(
" %s : %s ---> %s" % (param,
str(val2),
str(val)))
448 print(
" insert_time : %s" % explainInfo[param][
'insert_time'])
449 print(
" explanation : %s" % explainInfo[param][
'explanation'])
450 print(
" createdby : %s" % explainInfo[param][
'createdby'])
451 print(
" physicsGroup : %s" % explainInfo[param][
'physicsGroup'])
454 rowList += [
str(val)]
455 if rowString !=
"" and args.delim!=
"": rowString += args.delim
456 rowString +=
str(val)
459 headerString += param
460 if args.outFile != sys.stdout:
461 if type(fieldDefaults[param])==bool: headerString +=
"/O:"
462 elif type(fieldDefaults[param])==int: headerString +=
"/I:"
463 elif type(fieldDefaults[param])==float: headerString +=
"/D:"
466 else: headerString +=
"/C:"
469 if param
in paramUnits:
470 headerString +=
" [%s]" % paramUnits[param]
471 v +=
" [%s]" % paramUnits[param]
474 if args.oldTimestamp!=
"":
continue
477 if args.outFile!=sys.stdout:
print(headerString[:-1],file=args.outFile)
479 if args.outFile!=sys.stdout
and args.delim!=
"":
print(commentCache,file=args.outFile)
480 outputTable += [[
"COMMENT",commentCache]]
483 if args.outFile != sys.stdout
and args.delim!=
"":
print(rowString,file=args.outFile)
484 outputTable += [rowList]
486 for (field,expl)
in explainInfo.items():
487 outString =
"#%s: { " % field
489 for eField
in args.explainInfo:
490 if doneFirst: outString +=
" , "
491 if eField
not in expl.keys(): outString +=
" %s: <NONE .. value is default>"%eField
492 else: outString +=
"%s: %s" % (eField,expl[eField])
495 print(outString,file=args.outFile)
497 if args.oldTimestamp!=
"":
502 if args.outFile == sys.stdout
or args.delim==
"":
504 columnWidths = [0]*len(args.fields)
505 for i
in range(0,len(tableHeaders)):
506 columnWidths[i] = len(tableHeaders[i])
507 for r
in outputTable:
508 if len(r)>0
and r[0]==
"COMMENT":
continue
509 for i
in range(0,len(r)):
510 if len(r[i])>columnWidths[i]: columnWidths[i]=len(r[i])
512 for i
in range(0,len(tableHeaders)):
513 lineout += tableHeaders[i].ljust(columnWidths[i]) +
" "
515 for r
in outputTable:
517 if len(r)>0
and r[0]==
"COMMENT": lineout = r[1]
519 for i
in range(0,len(r)):
520 lineout += r[i].ljust(columnWidths[i]) +
" "
521 print(lineout,file=args.outFile)
525 if args.outFile != sys.stdout:
527 datasetss = [x
for x
in dataset_values.keys()
if not x.startswith(
"comment")]
529 print(
"",file=args.outFile)
530 print(
"#lsetup \"asetup %s,%s\" pyAMI" % (os.environ.get(
'AtlasProject',
'UNKNOWN!'),os.environ.get(
'AtlasVersion',
'UNKNOWN!')),file=args.outFile)
531 print(
"#getMetadata.py --timestamp=\"%s\" --physicsGroups=\"%s\" --fields=\"%s\" --inDS=\"%s\"" % (args.timestamp,
",".
join(args.physicsGroups),
",".
join(args.fields),
",".
join(datasetss)),file=args.outFile )
532 logging.info(
"Results written to: %s", args.outFile.name)