ATLAS Offline Software
Loading...
Searching...
No Matches
getFileLists.py
Go to the documentation of this file.
1# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
2
3import os
4import sys
5from xml.dom.minidom import parse
6from time import time
7import re
8import json
9import six
10
11def tctPath(build,rel):
12 if not rel.startswith("rel_"):
13 rel="rel_"+rel
14 arch = os.environ['CMTCONFIG']
15 if build.startswith("17.") or build.startswith("18."):
16 arch = "i686-slc5-gcc43-opt"
17 return "/afs/cern.ch/atlas/project/RTT/prod/Results/rtt/"+rel+"/"+build+"/build/"+arch+"/Athena/Tier0ChainTests/"
18
20 def __init__(self,dir,log,runEventIn=()):
21 self.directory=dir
22 self.logfile=log
23 self.eventlist=runEventIn
24 self.loglines=0
25 self.cpulist=()
26 self.memlist=()
27 self.outputDict={}
28
29 def addRunEvent(self,run,event):
30 self.eventlist+=((run,event),)
31
33 def __init__(self, rDir, vDir, checkAge = False):
34 self._checkAge = checkAge
35 self._rDir = rDir
36 self._vDir = vDir
37 self._commonDirs = dict()
38 self._vFiles = []
39 self._rFiles = []
40
41 def checkFileAge(self,path):
42 try:
43 fileTime = os.stat(path)[8]
44 except Exception:
45 return
46 age = time() - fileTime
47
48 if age > 86400: #More than 24h
49 print ("WARNING! File %s is more than %d day(s) old" % (path, int(age/86400.)))
50 return
51
52
53 def hasLogfile(self,ref,p,fl):
54 #print ("Checking",p)
55 for f in fl:
56 logpath=p+'/'+f
57 #print ("\t",f)
58 if os.path.isfile(logpath) and f.endswith("_log") and not f.endswith("_script_log"):
59 #name=f[:-4].rstrip(digits) #Was needed for old rtt version, job index number appended
60 name=f[:-4]
61 tci=TCTChainInfo(p,logpath)
62 if ref: #Reference TCT (the first one)
63 if name in self._commonDirs:
64 print ("ERROR: Duplicate directory name",name)
65 else:
66 self._commonDirs[name]=(tci,)
67 else: #validation (the second one)
68 if name in self._commonDirs: #exits also in reference set
69 self._commonDirs[name]+=(tci,)
70 return
71
72
73 def addNew(self,fileList,file):
74 newSplit=file.split("/")
75 newStream=newSplit[-3]
76 newFile=newSplit[-1]
77 for old in fileList:
78 oldSplit=old.split("/")
79 if oldSplit[-1]==newFile and oldSplit[-3]==newStream:
80 return
81 #print ("Adding",file)
82 fileList+=[file,]
83 return
84
85 def findBetween(self, s, first, last ):
86 try:
87 start = s.index( first ) + len( first )
88 end = s.index( last, start )
89 return s[start:end]
90 except ValueError:
91 return ""
92
93 def getTCTChainInfo(self,tci):
94 validationStartLine = "INFO Validating output files"
95
96 try:
97 lf = open(tci.logfile,"r")
98 except Exception:
99 return None
100
101 # loop through the log file and find all the output files being validated
102 foundFileValidationStart = False
103 nextLineHasEventCount = False
104 lastOutputFileName = ""
105 for l in lf:
106 tci.loglines += 1
107 if not foundFileValidationStart:
108 if validationStartLine in l:
109 foundFileValidationStart = True
110 elif "Testing event count..." in l:
111 format = self.findBetween(l, "INFO", "Testing event count...").strip()
112 tci.outputDict[format] = -1
113 nextLineHasEventCount = True
114 lastOutputFileName = format
115 elif nextLineHasEventCount:
116 tci.outputDict[lastOutputFileName] = int(self.findBetween(l, "Event counting test passed (", " events)."))
117 nextLineHasEventCount = False
118 #print ("Done parsing %s, found the following output files:" % tci.logfile)
119 #for file in tci.outputDict:
120 # print (" %s : %d events" % (file, tci.outputDict[format]))
121 lf.close()
122 return tci.outputDict
123
124 def getChains(self):
125 print ("Input directory:",self._rDir)
126 print ("Searching for TCT sub-directories")
127 os.path.walk(self._rDir,self.hasLogfile,True)
128 print ("Found ",len(self._commonDirs),"directories")
129 return self._commonDirs
130
131
133 print ("Searching for compatible TCT directories ..." )
134 allEvents=0
135
136 os.path.walk(self._rDir, self.hasLogfile, True) # Reference directory
137 os.path.walk(self._vDir, self.hasLogfile, False) # Validation directory
138
139 names = self._commonDirs.keys()
140 for tctname in names:
141 if (tctname.startswith("LatestRun") or tctname.endswith("_MP") or tctname.endswith("IDCosmic0") or tctname.endswith("_PHYSVAL") or tctname.endswith("Derived_Outputs")):
142 print ("skipping "+tctname)
143 self._commonDirs.pop(tctname)
144 continue
145
146 tcis = self._commonDirs[tctname]
147
148 if len(tcis) != 2:
149 self._commonDirs.pop(tctname)
150 continue
151
152 ref = tcis[0]
153 val = tcis[1]
154
155 formats = ["RDO", "ESD", "AOD", "TAG"] # or anything matching them, though more than one will cause problems! /CO
156
157 refEvents = self.getTCTChainInfo(ref)
158 if refEvents is None or len(refEvents) == 0:
159 print ("No events found in",ref.logfile)
160 self._commonDirs.pop(tctname)
161 continue
162
163 valEvents = self.getTCTChainInfo(val)
164 if valEvents is None or len(valEvents) == 0:
165 print ("No events found in", val.logfile)
166 continue
167
168 if (valEvents == refEvents):
169 for file in refEvents:
170 allEvents += refEvents[file]
171 print ("TCT %s output seems compatible for ref and chk:" % (tctname))
172 for format in refEvents:
173 print ("%-70s: ref: %d events, val: %d events" % (format, refEvents[format], valEvents[format]))
174 else:
175 # workaround for when names of test output files change between two rels
176 print ("The (names of the) output files differ in some way:")
177 print (refEvents)
178 print (valEvents)
179 print ("Will now attempt to match the files by type")
180 matchFound = False
181 for refFormat in refEvents:
182 if matchFound:
183 break # exit loop if a match was found
184 valFormat = "MOCK"
185 for vFormat in valEvents:
186 #print (vFormat)
187 for f in formats:
188 if matchFound:
189 break
190 if f in refFormat and f in vFormat:
191 valFormat = vFormat
192 print ("Both are %s: %s, %s" % (f, refFormat, valFormat))
193 matchFound = True
194 print (" %s, ref: %d, val: %d" % (format, refEvents[refFormat], valEvents[valFormat]))
195 if not matchFound:
196 # don't compare the files for this then!
197 self._commonDirs.pop(tctname)
198 print ("TCT %s is NOT compatible, outputs different number of events for at least one format:" % tctname)
199 print ()
200
201 print ("Found %i compatible TCT chains with at total of %i events" % (len(self._commonDirs), allEvents))
202 #rint "Done"
203 #sys.exit(0)
204 return self._commonDirs
205
206
207 def findFilesInDir(self,dir,filename):
208 #print ("Searching dir",dir)
209 pattern=re.compile(filename)
210 result=[]
211 ls=os.listdir(dir)
212 for f in ls:
213 if len(pattern.findall(f)):
214 #print ("Found",f)
215 self.addNew(result,dir+"/"+f)
216 if "rttjobinfo.xml" in ls:
217 castorfiles=self.extractCastorPath(dir+"/rttjobinfo.xml",pattern)
218 for f in castorfiles:
219 self.addNew(result,f)
220 return result
221
222 def findFiles(self,pattern):
223 result=dict()
224 if len(self._commonDirs)==0:
225 self.getCommonChains()
226 #print (self._commonDirs)
227
228 for (name,(ref,val)) in self._commonDirs.items():
229 reffiles=self.findFilesInDir(ref.directory,pattern)
230 valfiles=self.findFilesInDir(val.directory,pattern)
231
232 result[name]=()
233 #Find pairs:
234 for rf in reffiles:
235 rfN=rf.split("/")[-1]
236 #print ("Checking:",rfN)
237 for vf in valfiles:
238 vfN=vf.split("/")[-1]
239 if (vfN == rfN):
240 result[name]+=((rf,vf),)
241 del vf
242 break
243 return result
244
245
246
247 def extractCastorPath(self,rttxmlfile,pattern):
248 res=[]
249 dom=parse(rttxmlfile)
250 archfiles=dom.getElementsByTagName("archivefile")
251 for af in archfiles:
252 cpEle=af.getElementsByTagName("destination")
253
254 if not cpEle:
255 s = af.firstChild.nodeValue
256 json_acceptable_string = s.replace("'", "\"")
257 d = json.loads(json_acceptable_string)
258 castorpath=d['src']
259 else:
260 castorpath=cpEle[0].childNodes[0].data.strip()
261 #print (castorpath)
262 if len(pattern.findall(castorpath)):
263 res+=[castorpath,]
264 del dom
265 return res
266
267
268
269if __name__=="__main__":
270 def usage():
271 print ("Find compatible pairs of TCT files")
272 print ("Usage example")
273 print (sys.argv[0],"15.6.X.Y rel_1 15.6.X.Y-VAL rel_2 'myTAGCOMM.*root'")
274
275 if len(sys.argv) != 6:
276 usage()
277 sys.exit(-1)
278
279 rPath=tctPath(sys.argv[1],sys.argv[2])
280 vPath=tctPath(sys.argv[3],sys.argv[4])
281
282 if not os.access(rPath,os.R_OK):
283 print ("Can't access output of reference TCT at",rPath)
284 sys.exit(-1)
285
286 if not os.access(vPath,os.R_OK):
287 print ("Can't access output of reference TCT at",vPath)
288 sys.exit(-1)
289
290 pattern=re.compile(sys.argv[5])
291 ff=findTCTFiles(rPath,vPath)
292
293 tctlist=ff.getCommonChains()
294
295 res=ff.findFiles(pattern)
296 for name,fl in six.iteritmes (res):
297 print (name)
298 for (r,v) in fl:
299 print ("Ref:",r)
300 print ("Val:",v)
301
302
303
__init__(self, dir, log, runEventIn=())
#exits also in reference set _commonDirs
addNew(self, fileList, file)
extractCastorPath(self, rttxmlfile, pattern)
__init__(self, rDir, vDir, checkAge=False)
findFilesInDir(self, dir, filename)
findBetween(self, s, first, last)
std::map< std::string, std::string > parse(const std::string &list)
tctPath(build, rel)