61 logging.basicConfig(format='%(levelname)s:%(message)s')
62
63 import time,datetime
64 from pytz import timezone
65 import argparse
66
67 try:
68 import pyAMI.client
69 import pyAMI.atlas.api as AtlasAPI
70 import pyAMI.config
71 except ImportError:
72 logging.error("Unable to find pyAMI client. Please try this command first: lsetup pyAMI")
73 return -1
74
75
76 extraFieldDefaults = {}
77
78 fieldDefaults = {"subprocessID":0,"dataset_number":0}
79
80 for field in pyAMI.config.tables['datasets'].keys():
81 if str(field) == "cross_section": continue
82 if str(field) in fieldDefaults.keys(): continue
83 if str(field).startswith("@"): continue
84 fieldDefaults[str(field)] = None
85
86
87
88 status,out = subprocess.getstatusoutput("voms-proxy-info -fqan -exists")
89 if status!=0:
90 logging.error("Please renew your certificate with this command: voms-proxy-init -voms atlas")
91 return -1
92
93 try:
94 client = pyAMI.client.Client('atlas')
95 AtlasAPI.init()
96 except Exception:
97 logging.error("Could not establish pyAMI session. Are you sure you have a valid certificate? Do: voms-proxy-init -voms atlas")
98 return -1
99
100
101 paramExplains = []
102
103 paramUnits = dict()
104
105 paramDefaults = {}
106
107 res = client.execute('ListPhysicsParameterDefs',format='dom_object')
108 for r in res.get_rows() :
109 explainString = "%s: %s" % (r[u'PARAMNAME'],r[u'DESCRIPTION'])
110 if r[u'UNITS']!=u'NULL':
111 explainString += " (units: %s)" % r[u'UNITS']
112 paramUnits[r[u'PARAMNAME']] = r[u'UNITS']
113 if r[u'HASDEFAULT']==u'N' : paramDefaults[str(r[u'PARAMNAME'])] = None
114 else:
115 explainString += " (default value = %s)" % r[u'DEFAULTVALUE']
116 if r[u'PARAMTYPE']==u'number': paramDefaults[str(r[u'PARAMNAME'])] = float(r[u'DEFAULTVALUE'])
117 elif r[u'PARAMTYPE']==u'string': paramDefaults[str(r[u'PARAMNAME'])] = str(r[u'DEFAULTVALUE'])
118 paramExplains += [explainString]
119
120 paramDefaults["crossSection_pb"] = None
121 paramUnits["crossSection_pb"] = "pb"
122 paramExplains += ["crossSection_pb: Same as crossSection except in pb units (units: pb)"]
123
124 cern_time = timezone('UCT')
125 current_time = datetime.datetime.fromtimestamp(time.time(),cern_time).strftime('%Y-%m-%d %H:%M:%S')
126
127 from argparse import RawTextHelpFormatter
128 parser = argparse.ArgumentParser(description=__doc__,formatter_class=RawTextHelpFormatter)
129 parser.add_argument('--inDS',nargs='+',default=[""],help="List of datasets to retrieve parameters for")
130 parser.add_argument('--inDsTxt',default="",help="Alternative to --inDS, can specify the datasets from an input file")
131 parser.add_argument('--fields',nargs='+',help="List of parameters to extract. Available parameters are: \n\n %s\n\nYou can also include any from:\n %s\nYou can also do keyword_xxx to add a bool branch for keywords" % ("\n ".join(paramExplains),", ".join(list(fieldDefaults.keys())+list(extraFieldDefaults.keys()))),default=["dataset_number","crossSection","kFactor","genFiltEff"])
132 parser.add_argument('--timestamp',default=current_time,help="The timestamp to query parameters at, specified in Universal Central Time (UCT). If left blank, will take the current time")
133 parser.add_argument('--physicsGroups',nargs='+',default=["PMG,MCGN"],help="Physics group from which to retrieve parameters, listed in order of priority (highest first). Default value is 'PMG,MCGN' (i.e. try to use PMG values, fallback on MCGN values if unavailable). Allowed groups are:\n PMG (this is the PMG's group name), BPHY, COSM, DAPR, EGAM, EXOT, FTAG, HIGG, HION, IDET, IDTR, JETM, LARG, MCGN (this is the AMI default group name), MDET, MUON, PHYS, REPR, SIMU, STDM, SUSY, TAUP, TCAL, TDAQ, THLT, TOPQ, TRIG, UPGR, VALI")
134
135 parser.add_argument('--oldTimestamp',default="",help="If specified, will instead display a diff between the old and new timestamp, showing explanation of any changed parameters")
136
137 parser.add_argument('--explainFields',nargs='+',default=[],help="The fields you would like explained .. will appear as comment lines after each row in the output")
138 parser.add_argument('--explainInfo',nargs='+',default=[],help="Properties of the parameter you want to show in the explanation. Can list from: explanation, insert_time, physicsGroup, createdby")
139 parser.add_argument('--outFile',default=sys.stdout,type=argparse.FileType('w'),help="Where to print the output to. Leave blank to print to stdout")
140 parser.add_argument('--delim',default="",help="The delimiter character. Defaults to spaces leading to nice formatting table")
141 parser.add_argument('-v',action='store_true',help="Verbose output for debugging")
142
143 args = parser.parse_args()
144
145 if args.v: logging.getLogger().setLevel(logging.DEBUG)
146 else: logging.getLogger().setLevel(logging.INFO)
147 logging.debug(args.inDS)
148 logging.debug(args.fields)
149 logging.debug(args.timestamp)
150
151 if args.timestamp=="the dawn of time":
152 logging.error("Unfortunately we don't know any parameters from this time period... but we're working on it!")
153 return 9999
154
155
156 args.fields = sum((y.split(',') for y in args.fields),[])
157 args.fields = [x.strip() for x in args.fields]
158
159 args.keywords=[]
160 for f in args.fields:
161 if f.startswith("keyword_"):
162 k = f[8:]
163
164 extraFieldDefaults["keyword_%s"%k]=bool(False)
165 args.keywords += [k]
166
167
168 args.physicsGroups = sum((y.split(',') for y in args.physicsGroups),[])
169 args.physicsGroups = [x.strip() for x in args.physicsGroups]
170
171
172
173 args.explainFields = sum((y.split(',') for y in args.explainFields),[])
174 args.explainFields = [x.strip() for x in args.explainFields]
175 args.explainInfo = sum((y.split(',') for y in args.explainInfo),[])
176 args.explainInfo = [x.strip() for x in args.explainInfo]
177
178 if args.inDsTxt != '': args.inDS = readDsFromFile(args.inDsTxt)
179
180
181 args.inDS = sum((y.split(',') for y in args.inDS),[])
182 args.inDS = [x.strip() for x in args.inDS]
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198 dsFields = [ x for x in args.fields if x in fieldDefaults.keys() and x not in ["subprocessID","ldn"] ]
199 extraFields = [ x for x in args.fields if x in extraFieldDefaults.keys() ]
200 paramFields = [ x for x in args.fields if x in paramDefaults.keys() ]
201
202 if len(paramFields)>0 and args.physicsGroups==[""]:
203 logging.error("You must specify at least one physics group. See -h for allowed groups")
204 return -1
205
206
207 fieldDefaults.update(paramDefaults)
208
209 fieldDefaults.update(extraFieldDefaults)
210
211 for field in args.fields:
212 if field not in fieldDefaults:
213 logging.error("%s is not a recognised field. Allowed fields are:", field)
214 logging.error(fieldDefaults.keys())
215 return -1
216
217
218 if args.oldTimestamp!="":
219 logging.info("oldTimestamp option specified. Running in diff mode...")
220 args.explainFields = args.fields
221 args.explainInfo = ["explanation","insert_time","physicsGroup","createdby"]
222
223
224
225 args.inDS = [ds.replace("*","%") for ds in args.inDS]
226 args.inDS = [ds.rstrip("/") for ds in args.inDS]
227
228 if len(args.inDS)==0 or (len(args.inDS)==1 and args.inDS[0]==""):
229 logging.error("No datasets provided. Please specify datasets with the --inDS or --inDsTxt options")
230 return -1
231
232 logging.info("Fetching list of datasets from AMI (this may take a few minutes)...")
233
234
235
236 res = AtlasAPI.list_datasets(client,patterns=args.inDS,fields=dsFields+['ldn'],ami_status="VALID")
237
238 logging.info("...Found %d datasets matching your selection", len(res))
239
240 if len(res)==0:
241 return 0
242
243
244
245 dataset_values = dict()
246 for r in res:
247 mydict = dict()
248 dataset_values[str(r['ldn'])] = mydict
249 for field in r.items():
250 if str(field[0]) == "ldn": continue
251 if str(field[0]) not in args.fields: continue
252 mydict[str(field[0])] = str(field[1])
253
254 if len(extraFields)>0 or len(args.keywords)>0:
255 info_res = AtlasAPI.get_dataset_info(client,str(r['ldn']))
256
257 if len(info_res)==0:
258 logging.error("Unable to retrieve dataset info for %s", r['ldn'])
259 return -1
260 for field in extraFields:
261
262 if field.startswith("keyword_"): continue
263 mydict[field] = float(info_res[0][str(field)]) if isfloat(info_res[0][str(field)]) else extraFieldDefaults[field]
264 for k in args.keywords:
265 mydict[
"keyword_%s" % k] = int( (k
in str(info_res[0][str(
'keyword')]).
split(
",")) )
266
267
268 from collections import OrderedDict
269 sorted_values = OrderedDict()
270 for ds in args.inDS:
271 if ds in dataset_values.keys():
272 sorted_values[ds] = dataset_values[ds]
273
274 for ds in dataset_values.keys():
275 if ds not in sorted_values.keys():
276 sorted_values[ds] = dataset_values[ds]
277 dataset_values = sorted_values
278
279 logging.debug(dataset_values)
280
281
282
283 for ds in args.inDS:
284 if '%' not in ds and ds not in dataset_values.keys():
285 logging.warning("Unknown dataset: %s", ds)
286
287 datasetsToQuery = ",".join(dataset_values.keys())
288
289
290 complete_values = OrderedDict()
291 if args.inDsTxt != "":
292
293 commentcount=0
294 import re
295 txt = open(args.inDsTxt)
296 for tmpLine in txt:
297
298 tmpLine = re.sub('\n','',tmpLine)
299
300 tmpLine = tmpLine.strip()
301
302 if tmpLine.startswith('#') or tmpLine == '':
303 complete_values['comment%d'%(commentcount)] = tmpLine
304 commentcount = commentcount+1
305 continue
306
307 tmpLine = tmpLine.rstrip("/")
308 if tmpLine in dataset_values.keys():
309 complete_values[tmpLine] = dataset_values[tmpLine]
310 else:
311 print(
"cannot find %s" % tmpLine)
312
313 txt.close()
314 dataset_values = complete_values
315
316 logging.info("Obtaining %s for selected datasets at timestamp=%s... (please be patient)",
317 args.fields, args.timestamp)
318
319
320 if(args.timestamp==current_time):
321 res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% datasetsToQuery,"--timestamp='%s'"%args.timestamp], format='dom_object')
322 else:
323 res = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% datasetsToQuery,"--timestamp='%s'"%args.timestamp,"--history=true"], format='dom_object')
324
325
326 parameterQueryResults = dict()
327 for r in res.get_rows():
328 if r[u'logicalDatasetName'] not in parameterQueryResults.keys():
329 parameterQueryResults[r[u'logicalDatasetName']] = []
330 parameterQueryResults[r[u'logicalDatasetName']] += [r]
331
332
333 if args.oldTimestamp!="" :
334 logging.info("Obtaining %s for selected datasets at timestamp=%s... (please be patient)",
335 args.fields,args.oldTimestamp)
336 res2 = client.execute(['GetPhysicsParamsForDataset',"--logicalDatasetName=%s"% datasetsToQuery,"--timestamp='%s'"%args.oldTimestamp,"--history=true"], format='dom_object')
337 old_parameterQueryResults = dict()
338 for r in res2.get_rows():
339 if r[u'logicalDatasetName'] not in old_parameterQueryResults.keys():
340 old_parameterQueryResults[r[u'logicalDatasetName']] = []
341 old_parameterQueryResults[r[u'logicalDatasetName']] += [r]
342
343 headerString = ""
344 doneHeader=False
345 commentCache = ""
346 commentCount = 0
347
348
349 outputTable = []
350 tableHeaders = []
351
352 for ds in dataset_values.keys():
353 if ds.startswith('comment'):
354 if commentCount > 0 : commentCache += "\n"
355 commentCache += dataset_values[ds]
356 commentCount=commentCount+1
357 continue
358
359
360
361
362
363 res = parameterQueryResults.get(ds,[])
364 if args.oldTimestamp!="": res2 = old_parameterQueryResults.get(ds,[])
365
366
367 dsSubprocesses = [0]
368 for r in res:
369 sp = int(r[u'subprocessID'])
370 if sp not in dsSubprocesses: dsSubprocesses += [sp]
371
372
373
374 for sp in dsSubprocesses:
375 paramVals = dict()
376 paramVals2 = dict()
377 groupsWithVals = dict()
378
379 explainInfo = dict()
380 for i in args.explainFields: explainInfo[i] = dict()
381
382 for param in paramFields:
383 groupsWithVals[param] = []
384 bestGroupIndex = len(args.physicsGroups)
385 import copy
386 paramVals[param] = copy.copy(fieldDefaults[param])
387 for r in res:
388 if int(r[u'subprocessID']) != sp: continue
389 if str(r[u'paramName']) != param and not (param=="crossSection_pb" and str(r[u'paramName'])=="crossSection"): continue
390 if str(r[u'physicsGroup']) not in args.physicsGroups:
391 groupsWithVals[param] += [(str(r[u'physicsGroup']),str(r[u'paramValue']))]
392 continue
393 if args.physicsGroups.index(str(r[u'physicsGroup'])) > bestGroupIndex : continue
394 if args.physicsGroups.index(str(r[u'physicsGroup'])) == bestGroupIndex : logging.warning("Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!", param, r[u'physicsGroup'], ds, sp)
395 paramVals[param] = str(r[u'paramValue'])
396 if param=="crossSection_pb": paramVals[param] = str(float(paramVals[param])*1000.0)
397 bestGroupIndex=args.physicsGroups.index(str(r[u'physicsGroup']))
398
399 for e in args.explainInfo:
400 if str(e) not in r:
401 logging.error("Unrecognised explainInfo field: %s", e)
402 return -1
403 explainInfo[param][e]=str(r[str(e)])
404 if args.oldTimestamp!="":
405 bestGroupIndex = len(args.physicsGroups)
406 paramVals2[param] = copy.copy(fieldDefaults[param])
407 for r in res2:
408 if int(r[u'subprocessID']) != sp: continue
409 if str(r[u'paramName']) != param and not (param=="crossSection_pb" and str(r[u'paramName'])=="crossSection"): continue
410 if str(r[u'physicsGroup']) not in args.physicsGroups: continue
411 if args.physicsGroups.index(str(r[u'physicsGroup'])) > bestGroupIndex : continue
412 if args.physicsGroups.index(str(r[u'physicsGroup'])) == bestGroupIndex : logging.warning("Duplicate parameter %s for group %s in dataset %s (subprocess %d). Please report this!", param, r[u'physicsGroup'], ds, sp)
413 paramVals2[param] = str(r[u'paramValue'])
414 if param=="crossSection_pb": paramVals2[param] = str(float(paramVals2[param])*1000.0)
415 bestGroupIndex=args.physicsGroups.index(str(r[u'physicsGroup']))
416
417
418 rowString = ""
419 rowList = []
420 firstPrint=False
421 for param in args.fields:
422 val = None
423 if param == "ldn": val = ds
424 elif param == "subprocessID": val = sp
425 elif param in dataset_values[ds].keys(): val = dataset_values[ds][param]
426 else: val = paramVals.get(param,None)
427 if val is None:
428 if args.outFile != sys.stdout: logging.warning("dataset %s (subprocess %d) does not have parameter %s, which has no default.",ds,sp,param)
429 if len(groupsWithVals.get(param,[]))>0:
430 logging.warning("The follow physicsGroups have defined that parameter though:")
431 logging.warning(groupsWithVals[param])
432 val = "#UNKNOWN#"
433
434
435 if args.oldTimestamp!="":
436
437 val2 = None
438 if param == "ldn": val2 = ds
439 elif param == "subprocessID": val2 = sp
440 elif param in dataset_values[ds].keys(): val2 = dataset_values[ds][param]
441 else: val2 = paramVals2.get(param,None)
442 if val2 is None: val2 = "#UNKNOWN#"
443
444 if(str(val)!=str(val2)):
445 if not firstPrint:
print(
"%s:" % ds)
446 firstPrint=True
447 print(
" %s : %s ---> %s" % (param,str(val2),str(val)))
448 print(
" insert_time : %s" % explainInfo[param][
'insert_time'])
449 print(
" explanation : %s" % explainInfo[param][
'explanation'])
450 print(
" createdby : %s" % explainInfo[param][
'createdby'])
451 print(
" physicsGroup : %s" % explainInfo[param][
'physicsGroup'])
452 continue
453
454 rowList += [str(val)]
455 if rowString != "" and args.delim!="": rowString += args.delim
456 rowString += str(val)
457
458 if not doneHeader:
459 headerString += param
460 if args.outFile != sys.stdout:
461 if type(fieldDefaults[param])==bool: headerString +=
"/O:"
462 elif type(fieldDefaults[param])==int: headerString +=
"/I:"
463 elif type(fieldDefaults[param])==float: headerString +=
"/D:"
464 elif isfloat(str(val)): headerString += "/D:"
465
466 else: headerString += "/C:"
467 else:
468 v = param
469 if param in paramUnits:
470 headerString += " [%s]" % paramUnits[param]
471 v += " [%s]" % paramUnits[param]
472 tableHeaders += [v]
473 headerString += " "
474 if args.oldTimestamp!="": continue
475 if not doneHeader:
476 doneHeader=True
477 if args.outFile!=sys.stdout:
print(headerString[:-1],file=args.outFile)
478 if commentCount > 0:
479 if args.outFile!=sys.stdout
and args.delim!=
"":
print(commentCache,file=args.outFile)
480 outputTable += [["COMMENT",commentCache]]
481 commentCache = ''
482 commentCount = 0
483 if args.outFile != sys.stdout
and args.delim!=
"":
print(rowString,file=args.outFile)
484 outputTable += [rowList]
485
486 for (field,expl) in explainInfo.items():
487 outString = "#%s: { " % field
488 doneFirst=False
489 for eField in args.explainInfo:
490 if doneFirst: outString += " , "
491 if eField not in expl.keys(): outString += " %s: <NONE .. value is default>"%eField
492 else: outString += "%s: %s" % (eField,expl[eField])
493 doneFirst=True
494 outString += " }"
495 print(outString,file=args.outFile)
496
497 if args.oldTimestamp!="":
498 args.outFile.close()
499 return 0
500
501
502 if args.outFile == sys.stdout or args.delim=="":
503
504 columnWidths = [0]*len(args.fields)
505 for i in range(0,len(tableHeaders)):
506 columnWidths[i] = len(tableHeaders[i])
507 for r in outputTable:
508 if len(r)>0 and r[0]=="COMMENT": continue
509 for i in range(0,len(r)):
510 if len(r[i])>columnWidths[i]: columnWidths[i]=len(r[i])
511 lineout = ""
512 for i in range(0,len(tableHeaders)):
513 lineout += tableHeaders[i].ljust(columnWidths[i]) + " "
515 for r in outputTable:
516 lineout = ""
517 if len(r)>0 and r[0]=="COMMENT": lineout = r[1]
518 else:
519 for i in range(0,len(r)):
520 lineout += r[i].ljust(columnWidths[i]) + " "
521 print(lineout,file=args.outFile)
522
523
524 import os
525 if args.outFile != sys.stdout:
526
527 datasetss = [x for x in dataset_values.keys() if not x.startswith("comment")]
528
529 print(
"",file=args.outFile)
530 print(
"#lsetup \"asetup %s,%s\" pyAMI" % (os.environ.get(
'AtlasProject',
'UNKNOWN!'),os.environ.get(
'AtlasVersion',
'UNKNOWN!')),file=args.outFile)
531 print(
"#getMetadata.py --timestamp=\"%s\" --physicsGroups=\"%s\" --fields=\"%s\" --inDS=\"%s\"" % (args.timestamp,
",".join(args.physicsGroups),
",".join(args.fields),
",".join(datasetss)),file=args.outFile )
532 logging.info("Results written to: %s", args.outFile.name)
533
534 args.outFile.close()
535
536
void print(char *figname, TCanvas *c1)
std::vector< std::string > split(const std::string &s, const std::string &t=":")