ATLAS Offline Software
getFileLists.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
2 
3 import os
4 import sys
5 from xml.dom.minidom import parse
6 from time import time
7 import re
8 import json
9 import six
10 
11 def tctPath(build,rel):
12  if not rel.startswith("rel_"):
13  rel="rel_"+rel
14  arch = os.environ['CMTCONFIG']
15  if build.startswith("17.") or build.startswith("18."):
16  arch = "i686-slc5-gcc43-opt"
17  return "/afs/cern.ch/atlas/project/RTT/prod/Results/rtt/"+rel+"/"+build+"/build/"+arch+"/Athena/Tier0ChainTests/"
18 
20  def __init__(self,dir,log,runEventIn=()):
21  self.directory=dir
22  self.logfile=log
23  self.eventlist=runEventIn
24  self.loglines=0
25  self.cpulist=()
26  self.memlist=()
27  self.outputDict={}
28 
29  def addRunEvent(self,run,event):
30  self.eventlist+=((run,event),)
31 
33  def __init__(self, rDir, vDir, checkAge = False):
34  self._checkAge = checkAge
35  self._rDir = rDir
36  self._vDir = vDir
37  self._commonDirs = dict()
38  self._vFiles = []
39  self._rFiles = []
40 
41  def checkFileAge(self,path):
42  try:
43  fileTime = os.stat(path)[8]
44  except Exception:
45  return
46  age = time() - fileTime
47 
48  if age > 86400: #More than 24h
49  print ("WARNING! File %s is more than %d day(s) old" % (path, int(age/86400.)))
50  return
51 
52 
53  def hasLogfile(self,ref,p,fl):
54  #print ("Checking",p)
55  for f in fl:
56  logpath=p+'/'+f
57  #print ("\t",f)
58  if os.path.isfile(logpath) and f.endswith("_log") and not f.endswith("_script_log"):
59  #name=f[:-4].rstrip(digits) #Was needed for old rtt version, job index number appended
60  name=f[:-4]
61  tci=TCTChainInfo(p,logpath)
62  if ref: #Reference TCT (the first one)
63  if name in self._commonDirs:
64  print ("ERROR: Duplicate directory name",name)
65  else:
66  self._commonDirs[name]=(tci,)
67  else: #validation (the second one)
68  if name in self._commonDirs: #exits also in reference set
69  self._commonDirs[name]+=(tci,)
70  return
71 
72 
73  def addNew(self,fileList,file):
74  newSplit=file.split("/")
75  newStream=newSplit[-3]
76  newFile=newSplit[-1]
77  for old in fileList:
78  oldSplit=old.split("/")
79  if oldSplit[-1]==newFile and oldSplit[-3]==newStream:
80  return
81  #print ("Adding",file)
82  fileList+=[file,]
83  return
84 
85  def findBetween(self, s, first, last ):
86  try:
87  start = s.index( first ) + len( first )
88  end = s.index( last, start )
89  return s[start:end]
90  except ValueError:
91  return ""
92 
93  def getTCTChainInfo(self,tci):
94  validationStartLine = "INFO Validating output files"
95 
96  try:
97  lf = open(tci.logfile,"r")
98  except Exception:
99  return None
100 
101  # loop through the log file and find all the output files being validated
102  foundFileValidationStart = False
103  nextLineHasEventCount = False
104  lastOutputFileName = ""
105  for l in lf:
106  tci.loglines += 1
107  if not foundFileValidationStart:
108  if validationStartLine in l:
109  foundFileValidationStart = True
110  elif "Testing event count..." in l:
111  format = self.findBetween(l, "INFO", "Testing event count...").strip()
112  tci.outputDict[format] = -1
113  nextLineHasEventCount = True
114  lastOutputFileName = format
115  elif nextLineHasEventCount:
116  tci.outputDict[lastOutputFileName] = int(self.findBetween(l, "Event counting test passed (", " events)."))
117  nextLineHasEventCount = False
118  #print ("Done parsing %s, found the following output files:" % tci.logfile)
119  #for file in tci.outputDict:
120  # print (" %s : %d events" % (file, tci.outputDict[format]))
121  lf.close()
122  return tci.outputDict
123 
124  def getChains(self):
125  print ("Input directory:",self._rDir)
126  print ("Searching for TCT sub-directories")
127  os.path.walk(self._rDir,self.hasLogfile,True)
128  print ("Found ",len(self._commonDirs),"directories")
129  return self._commonDirs
130 
131 
132  def getCommonChains(self):
133  print ("Searching for compatible TCT directories ..." )
134  allEvents=0
135 
136  os.path.walk(self._rDir, self.hasLogfile, True) # Reference directory
137  os.path.walk(self._vDir, self.hasLogfile, False) # Validation directory
138 
139  names = self._commonDirs.keys()
140  for tctname in names:
141  if (tctname.startswith("LatestRun") or tctname.endswith("_MP") or tctname.endswith("IDCosmic0") or tctname.endswith("_PHYSVAL") or tctname.endswith("Derived_Outputs")):
142  print ("skipping "+tctname)
143  self._commonDirs.pop(tctname)
144  continue
145 
146  tcis = self._commonDirs[tctname]
147 
148  if len(tcis) != 2:
149  self._commonDirs.pop(tctname)
150  continue
151 
152  ref = tcis[0]
153  val = tcis[1]
154 
155  formats = ["RDO", "ESD", "AOD", "TAG"] # or anything matching them, though more than one will cause problems! /CO
156 
157  refEvents = self.getTCTChainInfo(ref)
158  if refEvents is None or len(refEvents) == 0:
159  print ("No events found in",ref.logfile)
160  self._commonDirs.pop(tctname)
161  continue
162 
163  valEvents = self.getTCTChainInfo(val)
164  if valEvents is None or len(valEvents) == 0:
165  print ("No events found in", val.logfile)
166  continue
167 
168  if (valEvents == refEvents):
169  for file in refEvents:
170  allEvents += refEvents[file]
171  print ("TCT %s output seems compatible for ref and chk:" % (tctname))
172  for format in refEvents:
173  print ("%-70s: ref: %d events, val: %d events" % (format, refEvents[format], valEvents[format]))
174  else:
175  # workaround for when names of test output files change between two rels
176  print ("The (names of the) output files differ in some way:")
177  print (refEvents)
178  print (valEvents)
179  print ("Will now attempt to match the files by type")
180  matchFound = False
181  for refFormat in refEvents:
182  if matchFound:
183  break # exit loop if a match was found
184  valFormat = "MOCK"
185  for vFormat in valEvents:
186  #print (vFormat)
187  for f in formats:
188  if matchFound:
189  break
190  if f in refFormat and f in vFormat:
191  valFormat = vFormat
192  print ("Both are %s: %s, %s" % (f, refFormat, valFormat))
193  matchFound = True
194  print (" %s, ref: %d, val: %d" % (format, refEvents[refFormat], valEvents[valFormat]))
195  if not matchFound:
196  # don't compare the files for this then!
197  self._commonDirs.pop(tctname)
198  print ("TCT %s is NOT compatible, outputs different number of events for at least one format:" % tctname)
199  print ()
200 
201  print ("Found %i compatible TCT chains with at total of %i events" % (len(self._commonDirs), allEvents))
202  #rint "Done"
203  #sys.exit(0)
204  return self._commonDirs
205 
206 
207  def findFilesInDir(self,dir,filename):
208  #print ("Searching dir",dir)
209  pattern=re.compile(filename)
210  result=[]
211  ls=os.listdir(dir)
212  for f in ls:
213  if len(pattern.findall(f)):
214  #print ("Found",f)
215  self.addNew(result,dir+"/"+f)
216  if "rttjobinfo.xml" in ls:
217  castorfiles=self.extractCastorPath(dir+"/rttjobinfo.xml",pattern)
218  for f in castorfiles:
219  self.addNew(result,f)
220  return result
221 
222  def findFiles(self,pattern):
223  result=dict()
224  if len(self._commonDirs)==0:
225  self.getCommonChains()
226  #print (self._commonDirs)
227 
228  for (name,(ref,val)) in six.iteritems (self._commonDirs):
229  reffiles=self.findFilesInDir(ref.directory,pattern)
230  valfiles=self.findFilesInDir(val.directory,pattern)
231 
232  result[name]=()
233  #Find pairs:
234  for rf in reffiles:
235  rfN=rf.split("/")[-1]
236  #print ("Checking:",rfN)
237  for vf in valfiles:
238  vfN=vf.split("/")[-1]
239  if (vfN == rfN):
240  result[name]+=((rf,vf),)
241  del vf
242  break
243  return result
244 
245 
246 
247  def extractCastorPath(self,rttxmlfile,pattern):
248  res=[]
249  dom=parse(rttxmlfile)
250  archfiles=dom.getElementsByTagName("archivefile")
251  for af in archfiles:
252  cpEle=af.getElementsByTagName("destination")
253 
254  if not cpEle:
255  s = af.firstChild.nodeValue
256  json_acceptable_string = s.replace("'", "\"")
257  d = json.loads(json_acceptable_string)
258  castorpath=d['src']
259  else:
260  castorpath=cpEle[0].childNodes[0].data.strip()
261  #print (castorpath)
262  if len(pattern.findall(castorpath)):
263  res+=[castorpath,]
264  del dom
265  return res
266 
267 
268 
269 if __name__=="__main__":
270  def usage():
271  print ("Find compatible pairs of TCT files")
272  print ("Usage example")
273  print (sys.argv[0],"15.6.X.Y rel_1 15.6.X.Y-VAL rel_2 'myTAGCOMM.*root'")
274 
275  if len(sys.argv) != 6:
276  usage()
277  sys.exit(-1)
278 
279  rPath=tctPath(sys.argv[1],sys.argv[2])
280  vPath=tctPath(sys.argv[3],sys.argv[4])
281 
282  if not os.access(rPath,os.R_OK):
283  print ("Can't access output of reference TCT at",rPath)
284  sys.exit(-1)
285 
286  if not os.access(vPath,os.R_OK):
287  print ("Can't access output of reference TCT at",vPath)
288  sys.exit(-1)
289 
290  pattern=re.compile(sys.argv[5])
291  ff=findTCTFiles(rPath,vPath)
292 
293  tctlist=ff.getCommonChains()
294 
295  res=ff.findFiles(pattern)
296  for name,fl in six.iteritmes (res):
297  print (name)
298  for (r,v) in fl:
299  print ("Ref:",r)
300  print ("Val:",v)
301 
302 
303 
python.getFileLists.findTCTFiles._vDir
_vDir
Definition: getFileLists.py:36
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.getFileLists.TCTChainInfo.cpulist
cpulist
Definition: getFileLists.py:25
python.getFileLists.findTCTFiles.findFilesInDir
def findFilesInDir(self, dir, filename)
Definition: getFileLists.py:207
parse
std::map< std::string, std::string > parse(const std::string &list)
Definition: egammaLayerRecalibTool.cxx:1054
python.getFileLists.findTCTFiles.checkFileAge
def checkFileAge(self, path)
Definition: getFileLists.py:41
python.getFileLists.findTCTFiles.addNew
def addNew(self, fileList, file)
Definition: getFileLists.py:73
python.getFileLists.TCTChainInfo.__init__
def __init__(self, dir, log, runEventIn=())
Definition: getFileLists.py:20
python.getFileLists.usage
def usage()
Definition: getFileLists.py:270
python.getFileLists.findTCTFiles._checkAge
_checkAge
Definition: getFileLists.py:34
python.getFileLists.findTCTFiles._vFiles
_vFiles
Definition: getFileLists.py:38
python.getFileLists.findTCTFiles.findBetween
def findBetween(self, s, first, last)
Definition: getFileLists.py:85
python.getFileLists.findTCTFiles.hasLogfile
def hasLogfile(self, ref, p, fl)
Definition: getFileLists.py:53
python.getFileLists.findTCTFiles.__init__
def __init__(self, rDir, vDir, checkAge=False)
Definition: getFileLists.py:33
python.getFileLists.TCTChainInfo.logfile
logfile
Definition: getFileLists.py:22
python.getFileLists.findTCTFiles.getChains
def getChains(self)
Definition: getFileLists.py:124
python.getFileLists.TCTChainInfo.outputDict
outputDict
Definition: getFileLists.py:27
python.getFileLists.findTCTFiles.getTCTChainInfo
def getTCTChainInfo(self, tci)
Definition: getFileLists.py:93
python.getFileLists.findTCTFiles._rFiles
_rFiles
Definition: getFileLists.py:39
python.getFileLists.findTCTFiles
Definition: getFileLists.py:32
python.getFileLists.TCTChainInfo.eventlist
eventlist
Definition: getFileLists.py:23
python.getFileLists.findTCTFiles.findFiles
def findFiles(self, pattern)
Definition: getFileLists.py:222
python.getFileLists.TCTChainInfo.directory
directory
Definition: getFileLists.py:21
python.getFileLists.findTCTFiles._rDir
_rDir
Definition: getFileLists.py:35
python.getFileLists.tctPath
def tctPath(build, rel)
Definition: getFileLists.py:11
python.getFileLists.TCTChainInfo.memlist
memlist
Definition: getFileLists.py:26
python.getFileLists.TCTChainInfo
Definition: getFileLists.py:19
python.getFileLists.findTCTFiles.extractCastorPath
def extractCastorPath(self, rttxmlfile, pattern)
Definition: getFileLists.py:247
Trk::open
@ open
Definition: BinningType.h:40
python.getFileLists.findTCTFiles.getCommonChains
def getCommonChains(self)
Definition: getFileLists.py:132
python.getFileLists.TCTChainInfo.addRunEvent
def addRunEvent(self, run, event)
Definition: getFileLists.py:29
python.Bindings.keys
keys
Definition: Control/AthenaPython/python/Bindings.py:798
python.getFileLists.findTCTFiles._commonDirs
_commonDirs
Definition: getFileLists.py:37
python.getFileLists.TCTChainInfo.loglines
loglines
Definition: getFileLists.py:24