68 logging.basicConfig(format=
'%(levelname)s:%(message)s')
71 from pytz
import timezone
76 import pyAMI.atlas.api
as AtlasAPI
79 logging.error(
"Unable to find pyAMI client. Please try this command first: lsetup pyAMI")
83 extraFieldDefaults = {}
85 fieldDefaults = {
"subprocessID":0,
"dataset_number":0}
87 for field
in pyAMI.config.tables[
'datasets'].
keys():
88 if str(field) ==
"cross_section":
continue
89 if str(field)
in fieldDefaults.keys():
continue
90 if str(field).startswith(
"@"):
continue
91 fieldDefaults[
str(field)] =
None
95 status,out = subprocess.getstatusoutput(
"voms-proxy-info -fqan -exists")
97 logging.error(
"Please renew your certificate with this command: voms-proxy-init -voms atlas")
101 client = pyAMI.client.Client(
'atlas')
104 logging.error(
"Could not establish pyAMI session. Are you sure you have a valid certificate? Do: voms-proxy-init -voms atlas")
114 res = client.execute(
'ListPhysicsParameterDefs',format=
'dom_object')
115 for r
in res.get_rows() :
116 explainString =
"%s: %s" % (r[
u'PARAMNAME'],r[
u'DESCRIPTION'])
117 if r[
u'UNITS']!=
u'NULL':
118 explainString +=
" (units: %s)" % r[
u'UNITS']
119 paramUnits[r[
u'PARAMNAME']] = r[
u'UNITS']
120 if r[
u'HASDEFAULT']==
u'N' : paramDefaults[
str(r[
u'PARAMNAME'])] =
None
122 explainString +=
" (default value = %s)" % r[
u'DEFAULTVALUE']
123 if r[
u'PARAMTYPE']==
u'number': paramDefaults[
str(r[
u'PARAMNAME'])] =
float(r[
u'DEFAULTVALUE'])
124 elif r[
u'PARAMTYPE']==
u'string': paramDefaults[
str(r[
u'PARAMNAME'])] =
str(r[
u'DEFAULTVALUE'])
125 paramExplains += [explainString]
127 paramDefaults[
"crossSection_pb"] =
None
128 paramUnits[
"crossSection_pb"] =
"pb"
129 paramExplains += [
"crossSection_pb: Same as crossSection except in pb units (units: pb)"]
131 cern_time = timezone(
'UCT')
132 current_time = datetime.datetime.fromtimestamp(time.time(),cern_time).strftime(
'%Y-%m-%d %H:%M:%S')
134 from argparse
import RawTextHelpFormatter
135 parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
136 parser.add_argument(
'--inDS',nargs=
'+',default=[
""],help=
"List of datasets to retrieve parameters for")
137 parser.add_argument(
'--inDsTxt',default=
"",help=
"Alternative to --inDS, can specify the datasets from an input file")
138 parser.add_argument(
'--fields',nargs=
'+',help=
"List of parameters to extract. Available parameters are: \n\n %s\n\nYou can also include any from:\n %s\nYou can also do keyword_xxx to add a bool branch for keywords" % (
"\n ".
join(paramExplains),
", ".
join(
list(fieldDefaults.keys())+
list(extraFieldDefaults.keys()))),default=[
"dataset_number",
"crossSection",
"kFactor",
"genFiltEff"])
139 parser.add_argument(
'--timestamp',default=current_time,help=
"The timestamp to query parameters at, specified in Universal Central Time (UCT). If left blank, will take the current time")
140 parser.add_argument(
'--physicsGroups',nargs=
'+',default=[
"PMG,MCGN"],help=
"Physics group from which to retrieve parameters, listed in order of priority (highest first). Default value is 'PMG,MCGN' (i.e. try to use PMG values, fallback on MCGN values if unavailable). Allowed groups are:\n PMG (this is the PMG's group name), BPHY, COSM, DAPR, EGAM, EXOT, FTAG, HIGG, HION, IDET, IDTR, JETM, LARG, MCGN (this is the AMI default group name), MDET, MUON, PHYS, REPR, SIMU, STDM, SUSY, TAUP, TCAL, TDAQ, THLT, TOPQ, TRIG, UPGR, VALI")
142 parser.add_argument(
'--oldTimestamp',default=
"",help=
"If specified, will instead display a diff between the old and new timestamp, showing explanation of any changed parameters")
144 parser.add_argument(
'--explainFields',nargs=
'+',default=[],help=
"The fields you would like explained .. will appear as comment lines after each row in the output")
145 parser.add_argument(
'--explainInfo',nargs=
'+',default=[],help=
"Properties of the parameter you want to show in the explanation. Can list from: explanation, insert_time, physicsGroup, createdby")
146 parser.add_argument(
'--outFile',default=sys.stdout,type=argparse.FileType(
'w'),help=
"Where to print the output to. Leave blank to print to stdout")
147 parser.add_argument(
'--delim',default=
"",help=
"The delimiter character. Defaults to spaces leading to nice formatting table")
148 parser.add_argument(
'-v',action=
'store_true',help=
"Verbose output for debugging")
150 args = parser.parse_args()
152 if args.v: logging.getLogger().
setLevel(logging.DEBUG)
153 else: logging.getLogger().
setLevel(logging.INFO)
154 logging.debug(args.inDS)
155 logging.debug(args.fields)
156 logging.debug(args.timestamp)
158 if args.timestamp==
"the dawn of time":
159 logging.error(
"Unfortunately we don't know any parameters from this time period... but we're working on it!")
163 args.fields =
sum((y.split(
',')
for y
in args.fields),[])
164 args.fields = [x.strip()
for x
in args.fields]
167 for f
in args.fields:
168 if f.startswith(
"keyword_"):
171 extraFieldDefaults[
"keyword_%s"%k]=
bool(
False)
175 args.physicsGroups =
sum((y.split(
',')
for y
in args.physicsGroups),[])
176 args.physicsGroups = [x.strip()
for x
in args.physicsGroups]
180 args.explainFields =
sum((y.split(
',')
for y
in args.explainFields),[])
181 args.explainFields = [x.strip()
for x
in args.explainFields]
182 args.explainInfo =
sum((y.split(
',')
for y
in args.explainInfo),[])
183 args.explainInfo = [x.strip()
for x
in args.explainInfo]
188 args.inDS =
sum((y.split(
',')
for y
in args.inDS),[])
189 args.inDS = [x.strip()
for x
in args.inDS]
205 dsFields = [ x
for x
in args.fields
if x
in fieldDefaults.keys()
and x
not in [
"subprocessID",
"ldn"] ]
206 extraFields = [ x
for x
in args.fields
if x
in extraFieldDefaults.keys() ]
207 paramFields = [ x
for x
in args.fields
if x
in paramDefaults.keys() ]
209 if len(paramFields)>0
and args.physicsGroups==[
""]:
210 logging.error(
"You must specify at least one physics group. See -h for allowed groups")
214 fieldDefaults.update(paramDefaults)
216 fieldDefaults.update(extraFieldDefaults)
218 for field
in args.fields:
219 if field
not in fieldDefaults:
220 logging.error(
"%s is not a recognised field. Allowed fields are:", field)
221 logging.error(fieldDefaults.keys())
225 if args.oldTimestamp!=
"":
226 logging.info(
"oldTimestamp option specified. Running in diff mode...")
227 args.explainFields = args.fields
228 args.explainInfo = [
"explanation",
"insert_time",
"physicsGroup",
"createdby"]
232 args.inDS = [ds.replace(
"*",
"%")
for ds
in args.inDS]
233 args.inDS = [ds.rstrip(
"/")
for ds
in args.inDS]
235 if len(args.inDS)==0
or (len(args.inDS)==1
and args.inDS[0]==
""):
236 logging.error(
"No datasets provided. Please specify datasets with the --inDS or --inDsTxt options")
239 logging.info(
"Fetching list of datasets from AMI (this may take a few minutes)...")
243 res = AtlasAPI.list_datasets(client,patterns=args.inDS,fields=dsFields+[
'ldn'],ami_status=
"VALID")
245 logging.info(
"...Found %d datasets matching your selection", len(res))
252 dataset_values = dict()
255 dataset_values[
str(r[
'ldn'])] = mydict
256 for field
in r.items():
257 if str(field[0]) ==
"ldn":
continue
258 if str(field[0])
not in args.fields:
continue
259 mydict[
str(field[0])] =
str(field[1])
261 if len(extraFields)>0
or len(args.keywords)>0:
262 info_res = AtlasAPI.get_dataset_info(client,
str(r[
'ldn']))
265 logging.error(
"Unable to retrieve dataset info for %s", r[
'ldn'])
267 for field
in extraFields:
269 if field.startswith(
"keyword_"):
continue
271 for k
in args.keywords:
272 mydict[
"keyword_%s" % k] =
int( (k
in str(info_res[0][
unicode(
'keyword')]).
split(
",")) )
275 from collections
import OrderedDict
276 sorted_values = OrderedDict()
278 if ds
in dataset_values.keys():
279 sorted_values[ds] = dataset_values[ds]
281 for ds
in dataset_values.keys():
282 if ds
not in sorted_values.keys():
283 sorted_values[ds] = dataset_values[ds]
284 dataset_values = sorted_values
286 logging.debug(dataset_values)
291 if '%' not in ds
and ds
not in dataset_values.keys():
292 logging.warning(
"Unknown dataset: %s", ds)
294 datasetsToQuery =
",".
join(dataset_values.keys())
297 complete_values = OrderedDict()
298 if args.inDsTxt !=
"":
302 txt =
open(args.inDsTxt)
305 tmpLine = re.sub(
'\n',
'',tmpLine)
307 tmpLine = tmpLine.strip()
309 if tmpLine.startswith(
'#')
or tmpLine ==
'':
310 complete_values[
'comment%d'%(commentcount)] = tmpLine
311 commentcount = commentcount+1
314 tmpLine = tmpLine.rstrip(
"/")
315 if tmpLine
in dataset_values.keys():
316 complete_values[tmpLine] = dataset_values[tmpLine]
318 print(
"cannot find %s" % tmpLine)
321 dataset_values = complete_values
323 logging.info(
"Obtaining %s for selected datasets at timestamp=%s... (please be patient)",
324 args.fields, args.timestamp)
327 if(args.timestamp==current_time):
328 res = client.execute([
'GetPhysicsParamsForDataset',
"--logicalDatasetName=%s"% datasetsToQuery,
"--timestamp='%s'"%args.timestamp], format=
'dom_object')
330 res = client.execute([
'GetPhysicsParamsForDataset',
"--logicalDatasetName=%s"% datasetsToQuery,
"--timestamp='%s'"%args.timestamp,
"--history=true"], format=
'dom_object')
333 parameterQueryResults = dict()
334 for r
in res.get_rows():
335 if r[
u'logicalDatasetName']
not in parameterQueryResults.keys():
336 parameterQueryResults[r[
u'logicalDatasetName']] = []
337 parameterQueryResults[r[
u'logicalDatasetName']] += [r]
340 if args.oldTimestamp!=
"" :
341 logging.info(
"Obtaining %s for selected datasets at timestamp=%s... (please be patient)",
342 args.fields,args.oldTimestamp)
343 res2 = client.execute([
'GetPhysicsParamsForDataset',
"--logicalDatasetName=%s"% datasetsToQuery,
"--timestamp='%s'"%args.oldTimestamp,
"--history=true"], format=
'dom_object')
344 old_parameterQueryResults = dict()
345 for r
in res2.get_rows():
346 if r[
u'logicalDatasetName']
not in old_parameterQueryResults.keys():
347 old_parameterQueryResults[r[
u'logicalDatasetName']] = []
348 old_parameterQueryResults[r[
u'logicalDatasetName']] += [r]
359 for ds
in dataset_values.keys():
360 if ds.startswith(
'comment'):
361 if commentCount > 0 : commentCache +=
"\n"
362 commentCache += dataset_values[ds]
363 commentCount=commentCount+1
370 res = parameterQueryResults.get(ds,[])
371 if args.oldTimestamp!=
"": res2 = old_parameterQueryResults.get(ds,[])
376 sp =
int(r[
u'subprocessID'])
377 if sp
not in dsSubprocesses: dsSubprocesses += [sp]
381 for sp
in dsSubprocesses:
384 groupsWithVals = dict()
387 for i
in args.explainFields: explainInfo[i] = dict()
389 for param
in paramFields:
390 groupsWithVals[param] = []
391 bestGroupIndex = len(args.physicsGroups)
393 paramVals[param] = copy.copy(fieldDefaults[param])
395 if int(r[
u'subprocessID']) != sp:
continue
396 if str(r[
u'paramName']) != param
and not (param==
"crossSection_pb" and str(r[
u'paramName'])==
"crossSection"):
continue
397 if str(r[
u'physicsGroup'])
not in args.physicsGroups:
398 groupsWithVals[param] += [(
str(r[
u'physicsGroup']),
str(r[
u'paramValue']))]
400 if args.physicsGroups.index(
str(r[
u'physicsGroup'])) > bestGroupIndex :
continue
401 if args.physicsGroups.index(
str(r[
u'physicsGroup'])) == bestGroupIndex : logging.warning(
"Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!", param, r[
u'physicsGroup'], ds, sp)
402 paramVals[param] =
str(r[
u'paramValue'])
403 if param==
"crossSection_pb": paramVals[param] =
str(
float(paramVals[param])*1000.0)
404 bestGroupIndex=args.physicsGroups.index(
str(r[
u'physicsGroup']))
406 for e
in args.explainInfo:
408 logging.error(
"Unrecognised explainInfo field: %s", e)
411 if args.oldTimestamp!=
"":
412 bestGroupIndex = len(args.physicsGroups)
413 paramVals2[param] = copy.copy(fieldDefaults[param])
415 if int(r[
u'subprocessID']) != sp:
continue
416 if str(r[
u'paramName']) != param
and not (param==
"crossSection_pb" and str(r[
u'paramName'])==
"crossSection"):
continue
417 if str(r[
u'physicsGroup'])
not in args.physicsGroups:
continue
418 if args.physicsGroups.index(
str(r[
u'physicsGroup'])) > bestGroupIndex :
continue
419 if args.physicsGroups.index(
str(r[
u'physicsGroup'])) == bestGroupIndex : logging.warning(
"Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!", param, r[
u'physicsGroup'], ds, sp)
420 paramVals2[param] =
str(r[
u'paramValue'])
421 if param==
"crossSection_pb": paramVals2[param] =
str(
float(paramVals2[param])*1000.0)
422 bestGroupIndex=args.physicsGroups.index(
str(r[
u'physicsGroup']))
428 for param
in args.fields:
430 if param ==
"ldn": val = ds
431 elif param ==
"subprocessID": val = sp
432 elif param
in dataset_values[ds].
keys(): val = dataset_values[ds][param]
433 else: val = paramVals.get(param,
None)
435 if args.outFile != sys.stdout: logging.warning(
"dataset %s (subprocess %d) does not have parameter %s, which has no default.",ds,sp,param)
436 if len(groupsWithVals.get(param,[]))>0:
437 logging.warning(
"The follow physicsGroups have defined that parameter though:")
438 logging.warning(groupsWithVals[param])
442 if args.oldTimestamp!=
"":
445 if param ==
"ldn": val2 = ds
446 elif param ==
"subprocessID": val2 = sp
447 elif param
in dataset_values[ds].
keys(): val2 = dataset_values[ds][param]
448 else: val2 = paramVals2.get(param,
None)
449 if val2
is None: val2 =
"#UNKNOWN#"
452 if not firstPrint:
print(
"%s:" % ds)
454 print(
" %s : %s ---> %s" % (param,
str(val2),
str(val)))
455 print(
" insert_time : %s" % explainInfo[param][
'insert_time'])
456 print(
" explanation : %s" % explainInfo[param][
'explanation'])
457 print(
" createdby : %s" % explainInfo[param][
'createdby'])
458 print(
" physicsGroup : %s" % explainInfo[param][
'physicsGroup'])
461 rowList += [
str(val)]
462 if rowString !=
"" and args.delim!=
"": rowString += args.delim
463 rowString +=
str(val)
466 headerString += param
467 if args.outFile != sys.stdout:
468 if type(fieldDefaults[param])==bool: headerString +=
"/O:"
469 elif type(fieldDefaults[param])==int: headerString +=
"/I:"
470 elif type(fieldDefaults[param])==float: headerString +=
"/D:"
473 else: headerString +=
"/C:"
476 if param
in paramUnits:
477 headerString +=
" [%s]" % paramUnits[param]
478 v +=
" [%s]" % paramUnits[param]
481 if args.oldTimestamp!=
"":
continue
484 if args.outFile!=sys.stdout:
print(headerString[:-1],file=args.outFile)
486 if args.outFile!=sys.stdout
and args.delim!=
"":
print(commentCache,file=args.outFile)
487 outputTable += [[
"COMMENT",commentCache]]
490 if args.outFile != sys.stdout
and args.delim!=
"":
print(rowString,file=args.outFile)
491 outputTable += [rowList]
493 for (field,expl)
in explainInfo.items():
494 outString =
"#%s: { " % field
496 for eField
in args.explainInfo:
497 if doneFirst: outString +=
" , "
498 if eField
not in expl.keys(): outString +=
" %s: <NONE .. value is default>"%eField
499 else: outString +=
"%s: %s" % (eField,expl[eField])
502 print(outString,file=args.outFile)
504 if args.oldTimestamp!=
"":
509 if args.outFile == sys.stdout
or args.delim==
"":
511 columnWidths = [0]*len(args.fields)
512 for i
in range(0,len(tableHeaders)):
513 columnWidths[i] = len(tableHeaders[i])
514 for r
in outputTable:
515 if len(r)>0
and r[0]==
"COMMENT":
continue
516 for i
in range(0,len(r)):
517 if len(r[i])>columnWidths[i]: columnWidths[i]=len(r[i])
519 for i
in range(0,len(tableHeaders)):
520 lineout += tableHeaders[i].ljust(columnWidths[i]) +
" "
522 for r
in outputTable:
524 if len(r)>0
and r[0]==
"COMMENT": lineout = r[1]
526 for i
in range(0,len(r)):
527 lineout += r[i].ljust(columnWidths[i]) +
" "
528 print(lineout,file=args.outFile)
532 if args.outFile != sys.stdout:
534 datasetss = [x
for x
in dataset_values.keys()
if not x.startswith(
"comment")]
536 print(
"",file=args.outFile)
537 print(
"#lsetup \"asetup %s,%s\" pyAMI" % (os.environ.get(
'AtlasProject',
'UNKNOWN!'),os.environ.get(
'AtlasVersion',
'UNKNOWN!')),file=args.outFile)
538 print(
"#getMetadata.py --timestamp=\"%s\" --physicsGroups=\"%s\" --fields=\"%s\" --inDS=\"%s\"" % (args.timestamp,
",".
join(args.physicsGroups),
",".
join(args.fields),
",".
join(datasetss)),file=args.outFile )
539 logging.info(
"Results written to: %s", args.outFile.name)