ATLAS Offline Software
trfValidation.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 
14 import fnmatch
15 import os
16 import re
17 
18 from subprocess import Popen, STDOUT, PIPE
19 
20 import logging
21 msg = logging.getLogger(__name__)
22 
23 from PyUtils import RootUtils
24 
25 from PyJobTransforms.trfExeStepTools import getExecutorStepEventCounts
26 from PyJobTransforms.trfExitCodes import trfExit
27 from PyJobTransforms.trfLogger import stdLogLevels
28 from PyJobTransforms.trfArgClasses import argFile
29 
30 import PyJobTransforms.trfExceptions as trfExceptions
31 import PyJobTransforms.trfUtils as trfUtils
32 
33 
34 # @brief Check a Pool file for corruption, return N events or -1 if access problem, -2 if corruption
35 def corruptionTestPool(filename, verbose=False):
36  if not os.access(filename, os.R_OK):
37  msg.info("ERROR can't access file %s", filename)
38  return -1
39 
40  ROOT = RootUtils.import_root()
41 
42  try:
43  f = ROOT.TFile.Open(filename)
44  except Exception:
45  msg.info("Can't open file %s", filename)
46  return -1
47 
48  nEvents = None
49 
50  keys = f.GetListOfKeys()
51  for k in keys:
52  try:
53  tn = k.GetName()
54  t = f.Get(tn)
55  if not isinstance(t, ROOT.TTree): return
56  except Exception:
57  msg.info("Can't get tree %s from file %s", tn, filename)
58  f.Close()
59  return -1
60 
61  if (verbose): msg.info("Working on tree %s", tn)
62  n = t.GetEntriesFast()
63  for i in range(n):
64  s = t.GetEntry(i)
65  if s <= 0:
66  msg.info("Tree %s: Found corruption in event %i", i, n)
67  f.Close()
68  return -2
69  else:
70  if verbose and i > 0 and i % 100 == 0:
71  msg.info("Checking event %s", i)
72  msg.info("Tree %s: %i event(s) ok", tn, n)
73 
74  # Use CollectionTree determine the number of events
75  if tn == 'CollectionTree':
76  nEvents = n
77  pass # end of loop over trees
78 
79  f.Close()
80  msg.info("ROOT file %s looks ok", filename)
81  if n is None:
82  msg.info("Failed to determine number of events in file %s. No tree named 'CollectionTree'", filename)
83  return 0
84  return nEvents
85 
86 # @brief Check BS file for corruption
87 def corruptionTestBS(filename):
88  # First try AtlListBSEvents -c %filename:
89  cmd = ['AtlListBSEvents', '-c', filename]
90  p = Popen(cmd, shell=False, stdout=PIPE, stderr=STDOUT, close_fds=True)
91  while p.poll() is None:
92  line = p.stdout.readline()
93  if line:
94  msg.info("AtlListBSEvents Report: %s", line.strip())
95  rc = p.returncode
96  return rc
97 
98 
99 
101 
102 
106  def __init__(self, files=['atlas_error_mask.db'], extraSearch = []):
107  # Setup structured search patterns
109  self._initalisePatterns(files)
110 
111  # Setup extra search patterns
112  self._searchPatterns = []
113  self._initialiseSerches(extraSearch)
114 
115  @property
117  return self._structuredPatterns
118 
119  @property
120  def searchPatterns(self):
121  return self._searchPatterns
122 
123  def _initalisePatterns(self, files):
124  for patternFile in files:
125  if patternFile == "None":
126  continue
127  fullName = trfUtils.findFile(os.environ['DATAPATH'], patternFile)
128  if not fullName:
129  msg.warning('Error pattern file {0} could not be found in DATAPATH'.format(patternFile))
130  continue
131  try:
132  with open(fullName) as patternFileHandle:
133  msg.debug('Opened error file {0} from here: {1}'.format(patternFile, fullName))
134 
135  for line in patternFileHandle:
136  line = line.strip()
137  if line.startswith('#') or line == '':
138  continue
139  try:
140  # N.B. At the moment release matching is not supported!
141  (who, level, message) = [ s.strip() for s in line.split(',', 2) ]
142  if who == "":
143  # Blank means match anything, so make it so...
144  who = "."
145  reWho = re.compile(who)
146  reMessage = re.compile(message)
147  except ValueError:
148  msg.warning('Could not parse this line as a valid error pattern: {0}'.format(line))
149  continue
150  except re.error as e:
151  msg.warning('Could not parse valid regexp from {0}: {1}'.format(message, e))
152  continue
153 
154  msg.debug('Successfully parsed: who={0}, level={1}, message={2}'.format(who, level, message))
155 
156  self._structuredPatterns.append({'service': reWho, 'level': level, 'message': reMessage})
157 
158  except OSError as xxx_todo_changeme:
159  (errno, errMsg) = xxx_todo_changeme.args
160  msg.warning('Failed to open error pattern file {0}: {1} ({2})'.format(fullName, errMsg, errno))
161 
162 
163  def _initialiseSerches(self, searchStrings=[]):
164  for string in searchStrings:
165  try:
166  self._searchPatterns.append(re.compile(string))
167  msg.debug('Successfully parsed additional logfile search string: {0}'.format(string))
168  except re.error as e:
169  msg.warning('Could not parse valid regexp from {0}: {1}'.format(string, e))
170 
171 
172 
173 
177  def __init__(self, logfile=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR']):
178 
179  # We can have one logfile or a set
180  if isinstance(logfile, str):
181  self._logfile = [logfile, ]
182  else:
183  self._logfile = logfile
184 
185  self._msgLimit = msgLimit
186  self._msgDetails = msgDetailLevel
187  self._re = None
188 
189  if logfile:
190  self.scanLogFile(logfile)
191 
192  def resetReport(self):
193  pass
194 
195  def scanLogFile(self):
196  pass
197 
198  def worstError(self):
199  pass
200 
201  def firstError(self):
202  pass
203 
204  def __str__(self):
205  return ''
206 
207 
208 
212 
216  def __init__(self, logfile, substepName=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'], ignoreList=None):
217  if ignoreList:
218  self._ignoreList = ignoreList
219  else:
220  self._ignoreList = ignorePatterns()
221 
222 
227  self._regExp = re.compile(r'(?P<service>[^\s]+\w)(.*)\s+(?P<level>' + '|'.join(stdLogLevels) + r')\s+(?P<message>.*)')
228 
229  self._metaPat = re.compile(r"MetaData:\s+(.*?)\s*=\s*(.*)$")
230  self._metaData = {}
231  self._substepName = substepName
232  self._msgLimit = msgLimit
233 
234  self.resetReport()
235 
236  super(athenaLogFileReport, self).__init__(logfile, msgLimit, msgDetailLevel)
237 
238 
240  @property
241  def python(self):
242  errorDict = {'countSummary': {}, 'details': {}}
243  for level, count in self._levelCounter.items():
244  errorDict['countSummary'][level] = count
245  if self._levelCounter[level] > 0 and len(self._errorDetails[level]) > 0:
246  errorDict['details'][level] = []
247  for error in self._errorDetails[level]:
248  errorDict['details'][level].append(error)
249  return errorDict
250 
251  def resetReport(self):
252  self._levelCounter = {}
253  for level in list(stdLogLevels) + ['UNKNOWN', 'IGNORED']:
254  self._levelCounter[level] = 0
255 
256  self._errorDetails = {}
257  for level in self._levelCounter:
258  self._errorDetails[level] = []
259  # Format:
260  # List of dicts {'message': errMsg, 'firstLine': lineNo, 'count': N}
261  self._dbbytes = 0
262  self._dbtime = 0.0
263 
264 
266  def knowledgeFileHandler(self, knowledgefile):
267  # load abnormal/error line(s) from the knowledge file(s)
268  linesList = []
269  fullName = trfUtils.findFile(os.environ['DATAPATH'], knowledgefile)
270  if not fullName:
271  msg.warning('Knowledge file {0} could not be found in DATAPATH'.format(knowledgefile))
272  try:
273  with open(fullName) as knowledgeFileHandle:
274  msg.debug('Opened knowledge file {0} from here: {1}'.format(knowledgefile, fullName))
275 
276  for line in knowledgeFileHandle:
277  if line.startswith('#') or line == '' or line =='\n':
278  continue
279  line = line.rstrip('\n')
280  linesList.append(line)
281  except OSError as e:
282  msg.warning('Failed to open knowledge file {0}: {1}'.format(fullName, e))
283  return linesList
284 
285  def scanLogFile(self, resetReport=False):
286  nonStandardErrorsList = self.knowledgeFileHandler('nonStandardErrors.db')
287 
288  if resetReport:
289  self.resetReport()
290 
291  for log in self._logfile:
292  msg.debug('Now scanning logfile {0}'.format(log))
293  seenNonStandardError = ''
294  # N.B. Use the generator so that lines can be grabbed by subroutines, e.g., core dump svc reporter
295  try:
296  myGen = trfUtils.lineByLine(log, substepName=self._substepName)
297  except IOError as e:
298  msg.error('Failed to open transform logfile {0}: {1:s}'.format(log, e))
299  # Return this as a small report
300  self._levelCounter['ERROR'] = 1
301  self._errorDetails['ERROR'] = {'message': str(e), 'firstLine': 0, 'count': 1}
302  return
303  for line, lineCounter in myGen:
304  m = self._metaPat.search(line)
305  if m is not None:
306  key, value = m.groups()
307  self._metaData[key] = value
308 
309  m = self._regExp.match(line)
310  if m is None:
311  # We didn't manage to get a recognised standard line from the file
312  # But we can check for certain other interesting things, like core dumps
313  if 'Core dump from CoreDumpSvc' in line:
314  msg.warning('Detected CoreDumpSvc report - activating core dump svc grabber')
315  self.coreDumpSvcParser(log, myGen, line, lineCounter)
316  continue
317  # Add the G4 exceptipon parsers
318  if 'G4Exception-START' in line:
319  msg.warning('Detected G4 exception report - activating G4 exception grabber')
320  self.g4ExceptionParser(myGen, line, lineCounter, 40)
321  continue
322  if '*** G4Exception' in line:
323  msg.warning('Detected G4 9.4 exception report - activating G4 exception grabber')
324  self.g494ExceptionParser(myGen, line, lineCounter)
325  continue
326  # Add the python exception parser
327  if 'Shortened traceback (most recent user call last)' in line:
328  msg.warning('Detected python exception - activating python exception grabber')
329  self.pythonExceptionParser(log, myGen, line, lineCounter)
330  continue
331  # Add parser for missed bad_alloc
332  if 'terminate called after throwing an instance of \'std::bad_alloc\'' in line:
333  msg.warning('Detected bad_alloc!')
334  self.badAllocExceptionParser(myGen, line, lineCounter)
335  continue
336  # Parser for ROOT reporting a stale file handle (see ATLASG-448)
337  # Amendment: Generalize the search (see ATLASRECTS-7121)
338  if 'Error in <TFile::ReadBuffer>' in line:
339  self.rootSysErrorParser(myGen, line, lineCounter)
340  continue
341 
342  if 'Error in <TFile::WriteBuffer>' in line:
343  self.rootSysErrorParser(myGen, line, lineCounter)
344  continue
345  # Check if the line is among the non-standard logging errors from the knowledge file
346  if any(line in l for l in nonStandardErrorsList):
347  seenNonStandardError = line
348  continue
349 
350  msg.debug('Non-standard line in %s: %s', log, line)
351  self._levelCounter['UNKNOWN'] += 1
352  continue
353 
354  # Line was matched successfully
355  fields = {}
356  for matchKey in ('service', 'level', 'message'):
357  fields[matchKey] = m.group(matchKey)
358  msg.debug('Line parsed as: {0}'.format(fields))
359 
360  # Check this is not in our ignore list
361  ignoreFlag = False
362  for ignorePat in self._ignoreList.structuredPatterns:
363  serviceMatch = ignorePat['service'].match(fields['service'])
364  levelMatch = (ignorePat['level'] == "" or ignorePat['level'] == fields['level'])
365  messageMatch = ignorePat['message'].match(fields['message'])
366  if serviceMatch and levelMatch and messageMatch:
367  msg.info('Error message "{0}" was ignored at line {1} (structured match)'.format(line, lineCounter))
368  ignoreFlag = True
369  break
370  if ignoreFlag is False:
371  for searchPat in self._ignoreList.searchPatterns:
372  if searchPat.search(line):
373  msg.info('Error message "{0}" was ignored at line {1} (search match)'.format(line, lineCounter))
374  ignoreFlag = True
375  break
376  if ignoreFlag:
377  # Got an ignore - message this to a special IGNORED error
378  fields['level'] = 'IGNORED'
379  else:
380  # Some special handling for specific errors (maybe generalise this if
381  # there end up being too many special cases)
382  # Upgrade bad_alloc to CATASTROPHE to allow for better automated handling of
383  # jobs that run out of memory
384  if 'std::bad_alloc' in fields['message']:
385  fields['level'] = 'CATASTROPHE'
386 
387  # concatenate the seen non-standard logging error to the FATAL
388  if fields['level'] == 'FATAL':
389  if seenNonStandardError:
390  line += '; ' + seenNonStandardError
391 
392  # Count this error
393  self._levelCounter[fields['level']] += 1
394 
395  # Record some error details
396  # N.B. We record 'IGNORED' errors as these really should be flagged for fixing
397  if fields['level'] == 'IGNORED' or stdLogLevels[fields['level']] >= self._msgDetails:
398  if self._levelCounter[fields['level']] <= self._msgLimit:
399  detailsHandled = False
400  for seenError in self._errorDetails[fields['level']]:
401  if seenError['message'] == line:
402  seenError['count'] += 1
403  detailsHandled = True
404  break
405  if detailsHandled is False:
406  self._errorDetails[fields['level']].append({'message': line, 'firstLine': lineCounter, 'count': 1})
407  elif self._levelCounter[fields['level']] == self._msgLimit + 1:
408  msg.warning("Found message number {0} at level {1} - this and further messages will be supressed from the report".format(self._levelCounter[fields['level']], fields['level']))
409  else:
410  # Overcounted
411  pass
412  if 'Total payload read from COOL' in fields['message']:
413  msg.debug("Found COOL payload information at line {0}".format(line))
414  a = re.match(r'(\D+)(?P<bytes>\d+)(\D+)(?P<time>\d+[.]?\d*)(\D+)', fields['message'])
415  self._dbbytes += int(a.group('bytes'))
416  self._dbtime += float(a.group('time'))
417 
418 
419 
420  def dbMonitor(self):
421  return {'bytes' : self._dbbytes, 'time' : self._dbtime} if self._dbbytes > 0 or self._dbtime > 0 else None
422 
423 
424  def worstError(self):
425  worst = stdLogLevels['DEBUG']
426  worstName = 'DEBUG'
427  for lvl, count in self._levelCounter.items():
428  if count > 0 and stdLogLevels.get(lvl, 0) > worst:
429  worstName = lvl
430  worst = stdLogLevels[lvl]
431  if len(self._errorDetails[worstName]) > 0:
432  firstError = self._errorDetails[worstName][0]
433  else:
434  firstError = None
435 
436  return {'level': worstName, 'nLevel': worst, 'firstError': firstError}
437 
438 
439 
440  def firstError(self, floor='ERROR'):
441  firstLine = firstError = None
442  firstLevel = stdLogLevels[floor]
443  firstName = floor
444  for lvl, count in self._levelCounter.items():
445  if (count > 0 and stdLogLevels.get(lvl, 0) >= stdLogLevels[floor] and
446  (firstError is None or self._errorDetails[lvl][0]['firstLine'] < firstLine)):
447  firstLine = self._errorDetails[lvl][0]['firstLine']
448  firstLevel = stdLogLevels[lvl]
449  firstName = lvl
450  firstError = self._errorDetails[lvl][0]
451 
452  return {'level': firstName, 'nLevel': firstLevel, 'firstError': firstError}
453 
454 
455  def moreDetails(self, log, firstline, firstLineCount, knowledgeFile, offset=0):
456  # Look for "abnormal" and "last normal" line(s)
457  # Make a list of last e.g. 50 lines before core dump
458  abnormalLinesList = self.knowledgeFileHandler(knowledgeFile)
459  linesToBeScanned = 50
460  seenAbnormalLines = []
461  abnormalLinesReport = {}
462  lastNormalLineReport = {}
463 
464  linesList = []
465  myGen = trfUtils.lineByLine(log)
466  for line, linecounter in myGen:
467  if linecounter in range(firstLineCount - linesToBeScanned, firstLineCount-offset):
468  linesList.append([linecounter, line])
469  elif linecounter == firstLineCount:
470  break
471 
472  for linecounter, line in reversed(linesList):
473  if re.findall(r'|'.join(abnormalLinesList), line):
474  seenLine = False
475  for dic in seenAbnormalLines:
476  # count repetitions or similar (e.g. first 15 char) abnormal lines
477  if dic['message'] == line or dic['message'][0:15] == line[0:15]:
478  dic['count'] += 1
479  seenLine = True
480  break
481  if seenLine is False:
482  seenAbnormalLines.append({'message': line, 'firstLine': linecounter, 'count': 1})
483  else:
484  if line != '':
485  lastNormalLineReport = {'message': line, 'firstLine': linecounter, 'count': 1}
486  break
487  else:
488  continue
489 
490  # Write the list of abnormal lines into the abnormalLinesReport dictionary
491  # The keys of each abnormal line have a number suffix starting with 0
492  # e.g., first abnormal line's keys are :{'mesage0', 'firstLine0', 'count0'}
493 
494  for a in range(len(seenAbnormalLines)):
495  abnormalLinesReport.update({'message{0}'.format(a): seenAbnormalLines[a]['message'], 'firstLine{0}'.format(a): seenAbnormalLines[a]['firstLine'],
496  'count{0}'.format(a): seenAbnormalLines[a]['count']})
497 
498  return {'abnormalLines': abnormalLinesReport, 'lastNormalLine': lastNormalLineReport}
499 
500 
501 
507  def coreDumpSvcParser(self, log, lineGenerator, firstline, firstLineCount):
508  _eventCounter = _run = _event = _currentAlgorithm = _functionLine = _currentFunction = None
509  coreDumpReport = 'Core dump from CoreDumpSvc'
510  # Number of lines to ignore above 'core dump' when looking for abnormal lines
511  offset = 1
512  coreDumpDetailsReport = {}
513 
514  for line, linecounter in lineGenerator:
515  m = self._regExp.match(line)
516  if m is None:
517  if 'Caught signal 11(Segmentation fault)' in line:
518  coreDumpReport = 'Segmentation fault'
519  if 'Event counter' in line:
520  _eventCounter = line
521 
522  #Lookup: 'EventID: [Run,Evt,Lumi,Time,BunchCross,DetMask] = [267599,7146597,1,1434123751:0,0,0x0,0x0,0x0]'
523  if 'EventID' in line:
524  match = re.findall(r'\[.*?\]', line)
525  if match and match.__len__() >= 2: # Assuming the line contains at-least one key-value pair.
526  brackets = "[]"
527  commaDelimer = ','
528  keys = (match[0].strip(brackets)).split(commaDelimer)
529  values = (match[1].strip(brackets)).split(commaDelimer)
530 
531  if 'Run' in keys:
532  _run = 'Run: ' + values[keys.index('Run')]
533 
534  if 'Evt' in keys:
535  _event = 'Evt: ' + values[keys.index('Evt')]
536 
537  if 'Current algorithm' in line:
538  _currentAlgorithm = line
539  if '<signal handler called>' in line:
540  _functionLine = linecounter+1
541  if _functionLine and linecounter is _functionLine:
542  if ' in ' in line:
543  _currentFunction = 'Current Function: ' + line.split(' in ')[1].split()[0]
544  else:
545  _currentFunction = 'Current Function: ' + line.split()[1]
546  else:
547  # Can this be done - we want to push the line back into the generator to be
548  # reparsed in the normal way (might need to make the generator a class with the
549  # __exec__ method supported (to get the line), so that we can then add a
550  # pushback onto an internal FIFO stack
551  # lineGenerator.pushback(line)
552  break
553  _eventCounter = 'Event counter: unknown' if not _eventCounter else _eventCounter
554  _run = 'Run: unknown' if not _run else _run
555  _event = 'Evt: unknown' if not _event else _event
556  _currentAlgorithm = 'Current algorithm: unknown' if not _currentAlgorithm else _currentAlgorithm
557  _currentFunction = 'Current Function: unknown' if not _currentFunction else _currentFunction
558  coreDumpReport = '{0}: {1}; {2}; {3}; {4}; {5}'.format(coreDumpReport, _eventCounter, _run, _event, _currentAlgorithm, _currentFunction)
559 
560  coreDumpDetailsReport = self.moreDetails(log, firstline, firstLineCount, 'knowledgeFile.db', offset)
561  abnormalLines = coreDumpDetailsReport['abnormalLines']
562 
563  # concatenate an extract of first seen abnormal line to the core dump message
564  if 'message0' in abnormalLines.keys():
565  coreDumpReport += '; Abnormal line seen just before core dump: ' + abnormalLines['message0'][0:30] + '...[truncated] ' + '(see the jobReport)'
566 
567  # Core dumps are always fatal...
568  msg.debug('Identified core dump - adding to error detail report')
569  self._levelCounter['FATAL'] += 1
570  self._errorDetails['FATAL'].append({'moreDetails': coreDumpDetailsReport, 'message': coreDumpReport, 'firstLine': firstLineCount, 'count': 1})
571 
572 
573  def g494ExceptionParser(self, lineGenerator, firstline, firstLineCount):
574  g4Report = firstline
575  g4lines = 1
576  if 'Aborting execution' not in g4Report:
577  for line, linecounter in lineGenerator:
578  g4Report += os.linesep + line
579  g4lines += 1
580  # Test for the closing string
581  if '*** ' in line:
582  break
583  if g4lines >= 25:
584  msg.warning('G4 exception closing string not found within {0} log lines of line {1}'.format(g4lines, firstLineCount))
585  break
586 
587  # G4 exceptions can be fatal or they can be warnings...
588  msg.debug('Identified G4 exception - adding to error detail report')
589  if "just a warning" in g4Report:
590  if self._levelCounter['WARNING'] <= self._msgLimit:
591  self._levelCounter['WARNING'] += 1
592  self._errorDetails['WARNING'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1})
593  elif self._levelCounter['WARNING'] == self._msgLimit + 1:
594  msg.warning("Found message number {0} at level WARNING - this and further messages will be supressed from the report".format(self._levelCounter['WARNING']))
595  else:
596  self._levelCounter['FATAL'] += 1
597  self._errorDetails['FATAL'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1})
598 
599  def g4ExceptionParser(self, lineGenerator, firstline, firstLineCount, g4ExceptionLineDepth):
600  g4Report = firstline
601  g4lines = 1
602  for line, linecounter in lineGenerator:
603  g4Report += os.linesep + line
604  g4lines += 1
605  # Test for the closing string
606  if 'G4Exception-END' in line:
607  break
608  if g4lines >= g4ExceptionLineDepth:
609  msg.warning('G4 exception closing string not found within {0} log lines of line {1}'.format(g4lines, firstLineCount))
610  break
611 
612  # G4 exceptions can be fatal or they can be warnings...
613  msg.debug('Identified G4 exception - adding to error detail report')
614  if "-------- WWWW -------" in g4Report:
615  if self._levelCounter['WARNING'] <= self._msgLimit:
616  self._levelCounter['WARNING'] += 1
617  self._errorDetails['WARNING'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1})
618  elif self._levelCounter['WARNING'] == self._msgLimit + 1:
619  msg.warning("Found message number {0} at level WARNING - this and further messages will be supressed from the report".format(self._levelCounter['WARNING']))
620  else:
621  self._levelCounter['FATAL'] += 1
622  self._errorDetails['FATAL'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1})
623 
624 
625  def pythonExceptionParser(self, log, lineGenerator, firstline, firstLineCount):
626  pythonExceptionReport = ""
627  lastLine = firstline
628  lastLine2 = firstline
629  pythonErrorLine = firstLineCount
630  pyLines = 1
631  for line, linecounter in lineGenerator:
632  if 'Py:Athena' in line and 'INFO leaving with code' in line:
633  if len(lastLine)> 0:
634  pythonExceptionReport = lastLine
635  pythonErrorLine = linecounter-1
636  else: # Sometimes there is a blank line after the exception
637  pythonExceptionReport = lastLine2
638  pythonErrorLine = linecounter-2
639  break
640  if pyLines >= 25:
641  msg.warning('Could not identify python exception correctly scanning {0} log lines after line {1}'.format(pyLines, firstLineCount))
642  pythonExceptionReport = "Unable to identify specific exception"
643  pythonErrorLine = firstLineCount
644  break
645  lastLine2 = lastLine
646  lastLine = line
647  pyLines += 1
648 
649  pythonExceptionDetailsReport = self.moreDetails(log, firstline, firstLineCount, 'knowledgeFile.db')
650  abnormalLines = pythonExceptionDetailsReport['abnormalLines']
651 
652  # concatenate an extract of first seen abnormal line to pythonExceptionReport
653  if 'message0' in abnormalLines.keys():
654  pythonExceptionReport += '; Abnormal line seen just before python exception: ' + abnormalLines['message0'][0:30] + '...[truncated] ' + '(see the jobReport)'
655 
656  msg.debug('Identified python exception - adding to error detail report')
657  self._levelCounter['FATAL'] += 1
658  self._errorDetails['FATAL'].append({'moreDetails': pythonExceptionDetailsReport, 'message': pythonExceptionReport, 'firstLine': pythonErrorLine, 'count': 1})
659 
660 
661  def badAllocExceptionParser(self, lineGenerator, firstline, firstLineCount):
662  badAllocExceptionReport = 'terminate after \'std::bad_alloc\'.'
663 
664  msg.debug('Identified bad_alloc - adding to error detail report')
665  self._levelCounter['CATASTROPHE'] += 1
666  self._errorDetails['CATASTROPHE'].append({'message': badAllocExceptionReport, 'firstLine': firstLineCount, 'count': 1})
667 
668  def rootSysErrorParser(self, lineGenerator, firstline, firstLineCount):
669  msg.debug('Identified ROOT IO problem - adding to error detail report')
670  self._levelCounter['FATAL'] += 1
671  self._errorDetails['FATAL'].append({'message': firstline, 'firstLine': firstLineCount, 'count': 1})
672 
673  def __str__(self):
674  return str(self._levelCounter) + str(self._errorDetails)
675 
676 
678  def __init__(self, logfile=None, msgLimit=200, msgDetailLevel=stdLogLevels['ERROR']):
679  self._levelCounter = {}
680  self._errorDetails = {}
681  self.resetReport()
682  super(scriptLogFileReport, self).__init__(logfile, msgLimit, msgDetailLevel)
683 
684  def resetReport(self):
685  self._levelCounter.clear()
686  for level in list(stdLogLevels) + ['UNKNOWN', 'IGNORED']:
687  self._levelCounter[level] = 0
688 
689  self._errorDetails.clear()
690  for level in self._levelCounter: # List of dicts {'message': errMsg, 'firstLine': lineNo, 'count': N}
691  self._errorDetails[level] = []
692 
693  def scanLogFile(self, resetReport=False):
694  if resetReport:
695  self.resetReport()
696 
697  for log in self._logfile:
698  msg.info('Scanning logfile {0}'.format(log))
699  try:
700  myGen = trfUtils.lineByLine(log)
701  except IOError as e:
702  msg.error('Failed to open transform logfile {0}: {1:s}'.format(log, e))
703  # Return this as a small report
704  self._levelCounter['ERROR'] = 1
705  self._errorDetails['ERROR'] = {'message': str(e), 'firstLine': 0, 'count': 1}
706  return
707 
708  for line, lineCounter in myGen:
709  # TODO: This implementation currently only scans for Root SysErrors.
710  # General solution would be a have common error parser for all system level
711  # errors those all also handled by AthenaLogFileReport.
712  if line.__contains__('Error in <TFile::ReadBuffer>') or \
713  line.__contains__('Error in <TFile::WriteBuffer>'):
714  self.rootSysErrorParser(line, lineCounter)
715 
716  # Return the worst error found in the logfile (first error of the most serious type)
717  def worstError(self):
718  worstlevelName = 'DEBUG'
719  worstLevel = stdLogLevels[worstlevelName]
720  for levelName, count in self._levelCounter.items():
721  if count > 0 and stdLogLevels.get(levelName, 0) > worstLevel:
722  worstlevelName = levelName
723  worstLevel = stdLogLevels[levelName]
724 
725  if len(self._errorDetails[worstlevelName]) > 0:
726  firstError = self._errorDetails[worstlevelName][0]
727  else:
728  firstError = None
729 
730  return {'level': worstlevelName, 'nLevel': worstLevel, 'firstError': firstError}
731 
732  def __str__(self):
733  return str(self._levelCounter) + str(self._errorDetails)
734 
735  def rootSysErrorParser(self, line, lineCounter):
736  msg.debug('Identified ROOT IO problem - adding to error detail report')
737  self._levelCounter['FATAL'] += 1
738  self._errorDetails['FATAL'].append({'message': line, 'firstLine': lineCounter, 'count': 1})
739 
740 
743 def returnIntegrityOfFile(file, functionName):
744  try:
745  import PyJobTransforms.trfFileValidationFunctions as trfFileValidationFunctions
746  except Exception as exception:
747  msg.error('Failed to import module PyJobTransforms.trfFileValidationFunctions with error {error}'.format(error = exception))
748  raise
749  validationFunction = getattr(trfFileValidationFunctions, functionName)
750  return validationFunction(file)
751 
752 
753 
756 def performStandardFileValidation(dictionary, io, parallelMode = False, multithreadedMode=False):
757  if parallelMode is False:
758  msg.info('Starting legacy (serial) file validation')
759  for (key, arg) in dictionary.items():
760  if not isinstance(arg, argFile):
761  continue
762  if not arg.io == io:
763  continue
764  if arg.auxiliaryFile:
765  continue
766 
767  msg.info('Validating data type %s...', key)
768 
769  for fname in arg.value:
770  msg.info('Validating file %s...', fname)
771 
772  if io == "output":
773  msg.info('{0}: Testing corruption...'.format(fname))
774  if multithreadedMode:
775  os.environ['TRF_MULTITHREADED_VALIDATION']='TRUE'
776  if arg.getSingleMetadata(fname, 'integrity') is True:
777  msg.info('Corruption test passed.')
778  elif arg.getSingleMetadata(fname, 'integrity') is False:
779  msg.error('Corruption test failed.')
780  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % fname)
781  elif arg.getSingleMetadata(fname, 'integrity') == 'UNDEFINED':
782  msg.info('No corruption test defined.')
783  elif arg.getSingleMetadata(fname, 'integrity') is None:
784  msg.error('Could not check for file integrity')
785  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s might be missing' % fname)
786  else:
787  msg.error('Unknown rc from corruption test.')
788  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % fname)
789 
790 
791  msg.info('{0}: Testing event count...'.format(fname))
792  if arg.getSingleMetadata(fname, 'nentries') is not None:
793  msg.info('Event counting test passed ({0!s} events).'.format(arg.getSingleMetadata(fname, 'nentries')))
794  else:
795  msg.error('Event counting test failed.')
796  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % fname)
797 
798 
799  msg.info('{0}: Checking if guid exists...'.format(fname))
800  if arg.getSingleMetadata(fname, 'file_guid') is None:
801  msg.error('Guid could not be determined.')
802  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % fname)
803  elif arg.getSingleMetadata(fname, 'file_guid') == 'UNDEFINED':
804  msg.info('Guid not defined.')
805  else:
806  msg.info('Guid is %s', arg.getSingleMetadata(fname, 'file_guid'))
807  msg.info('Stopping legacy (serial) file validation')
808  if parallelMode is True:
809  msg.info('Starting parallel file validation')
810  # Create lists of files and args. These lists are to be used with zip in
811  # order to check and update file integrity metadata as appropriate.
812  fileList = []
813  argList = []
814  # Create a list of the integrity functions for files.
815  integrityFunctionList = []
816  # Create a list for collation of file validation jobs for submission to
817  # the parallel job processor.
818  jobs = []
819  for (key, arg) in dictionary.items():
820  if not isinstance(arg, argFile):
821  continue
822  if not arg.io == io:
823  continue
824  msg.debug('Collating list of files for validation')
825  for fname in arg.value:
826  msg.debug('Appending file {fileName} to list of files for validation'.format(fileName = str(fname)))
827  # Append the current file to the file list.
828  fileList.append(fname)
829  # Append the current arg to the arg list.
830  argList.append(arg)
831  # Append the current integrity function name to the integrity
832  # function list if it exists. If it does not exist, raise an
833  # exception.
834  if arg.integrityFunction:
835  integrityFunctionList.append(arg.integrityFunction)
836  else:
837  msg.error('Validation function for file {fileName} not available for parallel file validation'.format(fileName = str(fname)))
838  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'Validation function for file %s not available for parallel file validation' % str(fname))
839  # Compose a job for validation of the current file using the
840  # appropriate validation function, which is derived from the
841  # associated data attribute arg.integrityFunction.
842  jobs.append(
843  trfUtils.Job(
844  name = "validation of file {fileName}".format(
845  fileName = str(fname)),
846  workFunction = returnIntegrityOfFile,
847  workFunctionKeywordArguments = {
848  'file': fname,
849  'functionName': arg.integrityFunction
850  },
851  workFunctionTimeout = 600
852  )
853  )
854  # Contain the file validation jobs in a job group for submission to the
855  # parallel job processor.
856  jobGroup1 = trfUtils.JobGroup(
857  name = "standard file validation",
858  jobs = jobs
859  )
860  # Prepare the parallel job processor.
861  parallelJobProcessor1 = trfUtils.ParallelJobProcessor()
862  # Submit the file validation jobs to the parallel job processor.
863  msg.info('Submitting file validation jobs to parallel job processor')
864  parallelJobProcessor1.submit(jobSubmission = jobGroup1)
865  resultsList = parallelJobProcessor1.getResults()
866  msg.info('Parallel file validation complete')
867  # Update file metadata with integrity results using the lists fileList,
868  # argList and resultsList.
869  msg.info('Processing file integrity results')
870  for currentFile, currentArg, currentIntegrityFunction, currentResult in zip(fileList, argList, integrityFunctionList, resultsList):
871  msg.info('{IO} file {fileName} has integrity status {integrityStatus} as determined by integrity function {integrityFunction}'.format(
872  IO = str(io),
873  fileName = str(currentFile),
874  integrityStatus = str(currentResult),
875  integrityFunction = str(currentIntegrityFunction)
876  ))
877  # If the first (Boolean) element of the result tuple for the current
878  # file is True, update the integrity metadata. If it is False, raise
879  # an exception.
880  if currentResult[0] is True:
881  msg.info('Updating integrity metadata for file {fileName}'.format(fileName = str(currentFile)))
882  currentArg._setMetadata(files=[currentFile,], metadataKeys={'integrity': currentResult[0]})
883  else:
884  exceptionMessage = "{IO} file validation failure on file {fileName} with integrity status {integrityStatus} as determined by integrity function {integrityFunction}".format(
885  IO = str(io),
886  fileName = str(currentFile),
887  integrityStatus = str(currentResult),
888  integrityFunction = str(currentIntegrityFunction)
889  )
890  msg.error("exception message: {exceptionMessage}".format(
891  exceptionMessage = exceptionMessage
892  ))
893  if io == 'input':
894  exitCodeName = 'TRF_INPUT_FILE_VALIDATION_FAIL'
895  elif io == 'output':
896  exitCodeName = 'TRF_OUTPUT_FILE_VALIDATION_FAIL'
898  trfExit.nameToCode(exitCodeName),
899  exceptionMessage
900  )
901  # Perform a check to determine if the file integrity metadata is
902  # correct.
903  if currentArg.getSingleMetadata(currentFile, metadataKey = 'integrity', populate = False) == currentResult[0]:
904  msg.debug("file integrity metadata update successful")
905  else:
906  msg.error("file integrity metadata update unsuccessful")
907  msg.info('Stopping parallel file validation')
908 
909 
910 
912 
913 
920  def __init__(self, executor, eventCountConf=None, eventCountConfOverwrite=False):
921  self._executor = executor
922  self._eventCount = None
923 
924 
935  simEventEff = 0.995
936  self._eventCountConf = {}
937  self._eventCountConf['EVNT'] = {'EVNT_MRG':"match", "HITS": simEventEff, "EVNT_TR": "filter", "DAOD_TRUTH*" : "match"}
938  self._eventCountConf['EVNT_TR'] = {'HITS': simEventEff}
939  self._eventCountConf['HITS'] = {'RDO':"match", 'HITS_RSM': simEventEff, "HITS_MRG":"match", 'HITS_FILT': simEventEff, "RDO_FILT": "filter", "DAOD_TRUTH*" : "match", "HIST_SIM" : "match"}
940  self._eventCountConf['BS'] = {'ESD': "match", 'DRAW_*':"filter", 'NTUP_*':"filter", "BS_MRG":"match", 'DESD*': "filter", 'AOD':"match", 'DAOD*':"filter", "DAOD_PHYS":"match", "DAOD_PHYSLITE":"match"}
941  self._eventCountConf['RDO*'] = {'ESD': "match", 'DRAW_*':"filter", 'NTUP_*':"filter", "RDO_MRG":"match", "RDO_TRIG":"match", 'AOD':"match", 'DAOD*':"filter", "DAOD_PHYS":"match", "DAOD_PHYSLITE":"match", "HIST_DIGI":"match"}
942  self._eventCountConf['ESD'] = {'ESD_MRG': "match", 'AOD':"match", 'DESD*':"filter", 'DAOD_*':"filter", 'NTUP_*':"filter", "DAOD_PHYS":"match", "DAOD_PHYSLITE":"match"}
943  self._eventCountConf['AOD'] = {'AOD_MRG' : "match", 'TAG':"match", "NTUP_*":"filter", "DAOD_*":"filter", 'NTUP_*':"filter", "DAOD_PHYS":"match", "DAOD_PHYSLITE":"match"}
944  self._eventCountConf['AOD_MRG'] = {'TAG':"match"}
945  self._eventCountConf['DAOD_*'] = {'DAOD_*_MRG' : "match"}
946  self._eventCountConf['TAG'] = {'TAG_MRG': "match"}
947  self._eventCountConf['HIST'] = {'HIST_MRG': "match"}
948  self._eventCountConf['NTUP_COMMON'] = {'DNTUP*': "filter"}
949  self._eventCountConf['NTUP_*'] = {'NTUP_*_MRG': "match"}
950  # Next one comprises special data type names for smart merging of AthenaMP worker outputs
951  self._eventCountConf['POOL_MRG_INPUT'] = {'POOL_MRG_OUTPUT': "match"}
952 
953 
954  if eventCountConf:
955  if eventCountConfOverwrite is True:
956  self._eventCountConf = eventCountConf
957  else:
958  self._eventCountConf.update(eventCountConf)
959 
960  msg.debug('Event count check configuration is: {0}'.format(self._eventCountConf))
961  if hasattr(self._executor, 'name'):
962  msg.debug('Event count check ready for executor {0}'.format(self._executor.name))
963 
964  if self._executor is not None:
965  self.configureCheck(override=False)
966 
967  @property
968  def eventCount(self):
969  return self._eventCount
970 
971 
975  def configureCheck(self, override=False):
976  if override:
977  msg.info('Overriding check configuration with: {0}'.format(override))
978  self._inEventDict = override['inEventDict']
979  self._outEventDict = override['outEventDict']
980  self._skipEvents = override['skipEvents']
981  self._maxEvents = override['maxEvents']
982  self._evAccEff = override['evAccEff']
983  else:
984  # Input data from executor
985  self._inEventDict = {}
986  for dataTypeName in self._executor.input:
987  try:
988  self._inEventDict[dataTypeName] = self._executor.conf.dataDictionary[dataTypeName].nentries
989  msg.debug('Input data type {0} has {1} events'.format(dataTypeName, self._inEventDict[dataTypeName]))
990  except KeyError:
991  msg.warning('Found no dataDictionary entry for input data type {0}'.format(dataTypeName))
992 
993  # Output data from executor
994  self._outEventDict = {}
995  for dataTypeName in self._executor.output:
996  try:
997  self._outEventDict[dataTypeName] = self._executor.conf.dataDictionary[dataTypeName].nentries
998  msg.debug('Output data type {0} has {1} events'.format(dataTypeName, self._outEventDict[dataTypeName]))
999  except KeyError:
1000  msg.warning('Found no dataDictionary entry for output data type {0}'.format(dataTypeName))
1001 
1002  # Find if we have a skipEvents applied
1003  if "skipEvents" in self._executor.conf.argdict:
1004  self._skipEvents = self._executor.conf.argdict['skipEvents'].returnMyValue(exe=self._executor)
1005  else:
1006  self._skipEvents = None
1007 
1008  # Find if we have a maxEvents applied
1009  if "maxEvents" in self._executor.conf.argdict:
1010  self._maxEvents = self._executor.conf.argdict['maxEvents'].returnMyValue(exe=self._executor)
1011  if self._maxEvents == -1:
1012  self._maxEvents = None
1013  else:
1014  self._maxEvents = None
1015 
1016  # Executor substeps handling
1017  if self._executor.conf.totalExecutorSteps > 1 and self._executor.conf.executorStep < self._executor.conf.totalExecutorSteps - 1:
1018  executorEventCounts, executorEventSkips = getExecutorStepEventCounts(self._executor)
1019  self._maxEvents = executorEventCounts[self._executor.conf.executorStep]
1020  self._skipEvents = executorEventSkips[self._executor.conf.executorStep]
1021 
1022  # Global eventAcceptanceEfficiency set?
1023  if "eventAcceptanceEfficiency" in self._executor.conf.argdict:
1024  self._evAccEff = self._executor.conf.argdict['eventAcceptanceEfficiency'].returnMyValue(exe=self._executor)
1025  if (self._evAccEff is None):
1026  self._evAccEff = 0.99
1027  else:
1028  self._evAccEff = 0.99
1029 
1030  msg.debug("Event check conf: {0} {1}, {2}, {3}, {4}".format(self._inEventDict, self._outEventDict, self._skipEvents,
1031  self._maxEvents, self._evAccEff))
1032 
1033 
1034 
1035  def decide(self):
1036  # We have all that we need to proceed: input and output data, skip and max events plus any efficiency factor
1037  # So loop over the input and output data and make our checks
1038  for inData, neventsInData in self._inEventDict.items():
1039  if not isinstance(neventsInData, int):
1040  msg.warning('File size metadata for {inData} was not countable, found {neventsInData}. No event checks possible for this input data.'.format(inData=inData, neventsInData=neventsInData))
1041  continue
1042  if inData in self._eventCountConf:
1043  inDataKey = inData
1044  else:
1045  # OK, try a glob match in this case (YMMV)
1046  matchedInData = False
1047  for inDataKey in self._eventCountConf:
1048  if fnmatch.fnmatch(inData, inDataKey):
1049  msg.info("Matched input data type {inData} to {inDataKey} by globbing".format(inData=inData, inDataKey=inDataKey))
1050  matchedInData = True
1051  break
1052  if not matchedInData:
1053  msg.warning('No defined event count match for {inData} -> {outData}, so no check(s) possible in this case.'.format(inData=inData, outData=list(self._outEventDict)))
1054  continue
1055 
1056  # Now calculate the expected number of processed events for this input
1057  expectedEvents = neventsInData
1058  if self._skipEvents is not None and self._skipEvents > 0:
1059  expectedEvents -= self._skipEvents
1060  if expectedEvents < 0:
1061  msg.warning('skipEvents was set higher than the input events in {inData}: {skipEvents} > {neventsInData}. This is not an error, but it is not a normal configuration. Expected events is now 0.'.format(inData=inData, skipEvents=self._skipEvents, neventsInData=neventsInData))
1062  expectedEvents = 0
1063  if self._maxEvents is not None:
1064  if expectedEvents < self._maxEvents:
1065  if self._skipEvents is not None:
1066  msg.warning('maxEvents was set higher than inputEvents-skipEvents for {inData}: {maxEvents} > {neventsInData}-{skipEvents}. This is not an error, but it is not a normal configuration. Expected events remains {expectedEvents}.'.format(inData=inData, maxEvents=self._maxEvents, neventsInData=neventsInData, skipEvents=self._skipEvents, expectedEvents=expectedEvents))
1067  else:
1068  msg.warning('maxEvents was set higher than inputEvents for {inData}: {maxEvents} > {neventsInData}. This is not an error, but it is not a normal configuration. Expected events remains {expectedEvents}.'.format(inData=inData, maxEvents=self._maxEvents, neventsInData=neventsInData, expectedEvents=expectedEvents))
1069  else:
1070  expectedEvents = self._maxEvents
1071  msg.debug('Expected number of processed events for {0} is {1}'.format(inData, expectedEvents))
1072 
1073  # Loop over output data - first find event count configuration
1074  for outData, neventsOutData in self._outEventDict.items():
1075  if not isinstance(neventsOutData, int):
1076  msg.warning('File size metadata for {outData} was not countable, found "{neventsOutData}". No event checks possible for this output data.'.format(outData=outData, neventsOutData=neventsOutData))
1077  continue
1078  if outData in self._eventCountConf[inDataKey]:
1079  checkConf = self._eventCountConf[inDataKey][outData]
1080  outDataKey = outData
1081  else:
1082  # Look for glob matches
1083  checkConf = None
1084  for outDataKey, outDataConf in self._eventCountConf[inDataKey].items():
1085  if fnmatch.fnmatch(outData, outDataKey):
1086  msg.info('Matched output data type {outData} to {outDatakey} by globbing'.format(outData=outData, outDatakey=outDataKey))
1087  outDataKey = outData
1088  checkConf = outDataConf
1089  break
1090  if not checkConf:
1091  msg.warning('No defined event count match for {inData} -> {outData}, so no check possible in this case.'.format(inData=inData, outData=outData))
1092  continue
1093  msg.debug('Event count check for {inData} to {outData} is {checkConf}'.format(inData=inData, outData=outData, checkConf=checkConf))
1094 
1095  # Do the check for thsi input/output combination
1096  if checkConf == 'match':
1097  # We need an exact match
1098  if neventsOutData == expectedEvents:
1099  msg.info("Event count check for {inData} to {outData} passed: all processed events found ({neventsOutData} output events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData))
1100  else:
1101  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1102  'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1103  elif checkConf == 'filter':
1104  if neventsOutData <= expectedEvents and neventsOutData >= 0:
1105  msg.info("Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1106  else:
1107  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1108  'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from 0 to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1109  elif checkConf == 'minEff':
1110  if neventsOutData >= int(expectedEvents * self._evAccEff) and neventsOutData <= expectedEvents:
1111  msg.info("Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1112  else:
1113  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1114  'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from {minEvents} to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData,
1115  minEvents=int(expectedEvents * self._evAccEff), expectedEvents=expectedEvents))
1116  elif isinstance(checkConf, (float, int)):
1117  checkConf = float(checkConf)
1118  if checkConf < 0.0 or checkConf > 1.0:
1119  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1120  'Event count check for {inData} to {outData} is misconfigured: the efficiency factor of {eff} is not between 0 and 1.'.format(inData=inData, outData=outData, eff=checkConf))
1121  if neventsOutData >= int(expectedEvents * checkConf) and neventsOutData <= expectedEvents:
1122  msg.info("Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1123  else:
1124  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1125  'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from {minEvents} to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData,
1126  minEvents=int(expectedEvents * checkConf), expectedEvents=expectedEvents))
1127  else:
1128  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1129  'Unrecognised event count configuration for {inData} to {outData}: "{conf}" is not known'.format(inData=inData, outData=outData, conf=checkConf))
1130  self._eventCount = expectedEvents
1131  return True
python.trfValidation.athenaLogFileReport.knowledgeFileHandler
def knowledgeFileHandler(self, knowledgefile)
Generally, a knowledge file consists of non-standard logging error/abnormal lines which are left out ...
Definition: trfValidation.py:266
python.trfValidation.ignorePatterns._initalisePatterns
def _initalisePatterns(self, files)
Definition: trfValidation.py:123
python.trfValidation.athenaLogFileReport._errorDetails
_errorDetails
Definition: trfValidation.py:256
python.trfValidation.eventMatch._skipEvents
_skipEvents
Definition: trfValidation.py:980
python.trfValidation.athenaLogFileReport._metaPat
_metaPat
Definition: trfValidation.py:229
python.trfValidation.scriptLogFileReport.scanLogFile
def scanLogFile(self, resetReport=False)
Definition: trfValidation.py:693
python.trfValidation.athenaLogFileReport._dbtime
_dbtime
Definition: trfValidation.py:262
vtune_athena.format
format
Definition: vtune_athena.py:14
PyJobTransforms.trfFileValidationFunctions
Transform file validation functions.
python.trfValidation.eventMatch.decide
def decide(self)
Perform an event count check.
Definition: trfValidation.py:1035
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.trfValidation.logFileReport.worstError
def worstError(self)
Definition: trfValidation.py:198
python.trfValidation.eventMatch._eventCount
_eventCount
Definition: trfValidation.py:922
python.trfValidation.logFileReport.firstError
def firstError(self)
Definition: trfValidation.py:201
python.trfValidation.logFileReport.scanLogFile
def scanLogFile(self)
Definition: trfValidation.py:195
python.trfValidation.ignorePatterns.structuredPatterns
def structuredPatterns(self)
Definition: trfValidation.py:116
python.trfValidation.athenaLogFileReport._substepName
_substepName
Definition: trfValidation.py:231
python.trfValidation.logFileReport._logfile
_logfile
Definition: trfValidation.py:181
python.trfValidation.athenaLogFileReport.rootSysErrorParser
def rootSysErrorParser(self, lineGenerator, firstline, firstLineCount)
Definition: trfValidation.py:668
PyJobTransforms.trfArgClasses
Transform argument class definitions.
PyJobTransforms.trfExitCodes
Module for transform exit codes.
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.trfValidation.scriptLogFileReport.rootSysErrorParser
def rootSysErrorParser(self, line, lineCounter)
Definition: trfValidation.py:735
python.trfValidation.logFileReport.__init__
def __init__(self, logfile=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'])
Definition: trfValidation.py:177
search
void search(TDirectory *td, const std::string &s, std::string cwd, node *n)
recursive directory search for TH1 and TH2 and TProfiles
Definition: hcg.cxx:738
python.trfValidation.logFileReport._msgLimit
_msgLimit
Definition: trfValidation.py:185
python.trfValidation.returnIntegrityOfFile
def returnIntegrityOfFile(file, functionName)
return integrity of file using appropriate validation function @ detail This method returns the integ...
Definition: trfValidation.py:743
python.trfValidation.athenaLogFileReport._dbbytes
_dbbytes
Definition: trfValidation.py:261
python.trfValidation.athenaLogFileReport._metaData
_metaData
Definition: trfValidation.py:230
python.trfValidation.ignorePatterns.__init__
def __init__(self, files=['atlas_error_mask.db'], extraSearch=[])
Load error patterns from files.
Definition: trfValidation.py:106
python.trfValidation.eventMatch.__init__
def __init__(self, executor, eventCountConf=None, eventCountConfOverwrite=False)
check in- and output event counts
Definition: trfValidation.py:920
python.trfValidation.athenaLogFileReport._ignoreList
_ignoreList
Definition: trfValidation.py:218
python.trfValidation.logFileReport.resetReport
def resetReport(self)
Definition: trfValidation.py:192
python.trfValidation.athenaLogFileReport.g4ExceptionParser
def g4ExceptionParser(self, lineGenerator, firstline, firstLineCount, g4ExceptionLineDepth)
Definition: trfValidation.py:599
python.trfValidation.athenaLogFileReport.python
def python(self)
Produce a python dictionary summary of the log file report for inclusion in the executor report.
Definition: trfValidation.py:241
python.trfValidation.athenaLogFileReport.badAllocExceptionParser
def badAllocExceptionParser(self, lineGenerator, firstline, firstLineCount)
Definition: trfValidation.py:661
python.trfValidation.scriptLogFileReport._errorDetails
_errorDetails
Definition: trfValidation.py:680
python.trfUtils.ParallelJobProcessor
ParallelJobProcessor: a multiple-process processor of Job objects.
Definition: trfUtils.py:867
python.trfValidation.eventMatch._evAccEff
_evAccEff
Definition: trfValidation.py:982
python.trfValidation.scriptLogFileReport.__init__
def __init__(self, logfile=None, msgLimit=200, msgDetailLevel=stdLogLevels['ERROR'])
Definition: trfValidation.py:678
python.trfValidation.athenaLogFileReport.dbMonitor
def dbMonitor(self)
Return data volume and time spend to retrieve information from the database.
Definition: trfValidation.py:420
python.trfValidation.eventMatch._inEventDict
_inEventDict
Definition: trfValidation.py:978
python.trfValidation.ignorePatterns
Class of patterns that can be ignored from athena logfiles.
Definition: trfValidation.py:100
python.trfValidation.athenaLogFileReport.pythonExceptionParser
def pythonExceptionParser(self, log, lineGenerator, firstline, firstLineCount)
Definition: trfValidation.py:625
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
python.trfValidation.athenaLogFileReport.firstError
def firstError(self, floor='ERROR')
Return the first error found in the logfile above a certain loglevel.
Definition: trfValidation.py:440
python.trfValidation.athenaLogFileReport
Logfile suitable for scanning logfiles with an athena flavour, i.e., lines of the form "SERVICE LOGL...
Definition: trfValidation.py:211
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
python.trfValidation.corruptionTestPool
def corruptionTestPool(filename, verbose=False)
Definition: trfValidation.py:35
python.trfUtils.Job
Job: a set of pieces of information relevant to a given work function.
Definition: trfUtils.py:723
python.trfValidation.athenaLogFileReport.resetReport
def resetReport(self)
Definition: trfValidation.py:251
python.trfValidation.performStandardFileValidation
def performStandardFileValidation(dictionary, io, parallelMode=False, multithreadedMode=False)
perform standard file validation @ detail This method performs standard file validation in either ser...
Definition: trfValidation.py:756
python.trfValidation.ignorePatterns._structuredPatterns
_structuredPatterns
Definition: trfValidation.py:108
python.trfValidation.athenaLogFileReport.g494ExceptionParser
def g494ExceptionParser(self, lineGenerator, firstline, firstLineCount)
Definition: trfValidation.py:573
python.trfValidation.ignorePatterns._searchPatterns
_searchPatterns
Definition: trfValidation.py:112
python.trfExceptions.TransformValidationException
Group of validation exceptions.
Definition: trfExceptions.py:50
python.trfValidation.eventMatch._eventCountConf
_eventCountConf
Definition: trfValidation.py:936
python.trfValidation.corruptionTestBS
def corruptionTestBS(filename)
Definition: trfValidation.py:87
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.trfValidation.eventMatch.configureCheck
def configureCheck(self, override=False)
Setup the parameters needed to define particular checks.
Definition: trfValidation.py:975
python.trfValidation.athenaLogFileReport.coreDumpSvcParser
def coreDumpSvcParser(self, log, lineGenerator, firstline, firstLineCount)
Attempt to suck a core dump report from the current logfile This function scans logs in two different...
Definition: trfValidation.py:507
python.trfValidation.eventMatch._maxEvents
_maxEvents
Definition: trfValidation.py:981
python.trfValidation.ignorePatterns._initialiseSerches
def _initialiseSerches(self, searchStrings=[])
Definition: trfValidation.py:163
python.trfUtils.JobGroup
JobGroup: a set of Job objects and pieces of information relevant to a given set of Job objects.
Definition: trfUtils.py:787
python.trfValidation.athenaLogFileReport._levelCounter
_levelCounter
Definition: trfValidation.py:252
python.trfExeStepTools.getExecutorStepEventCounts
def getExecutorStepEventCounts(executor, argdict=None)
Definition: trfExeStepTools.py:44
python.trfValidation.scriptLogFileReport.resetReport
def resetReport(self)
Definition: trfValidation.py:684
TrigJetMonitorAlgorithm.items
items
Definition: TrigJetMonitorAlgorithm.py:79
python.trfValidation.athenaLogFileReport.scanLogFile
def scanLogFile(self, resetReport=False)
Definition: trfValidation.py:285
python.trfValidation.eventMatch._executor
_executor
Definition: trfValidation.py:921
python.trfValidation.scriptLogFileReport.__str__
def __str__(self)
Definition: trfValidation.py:732
Trk::open
@ open
Definition: BinningType.h:40
python.trfValidation.athenaLogFileReport._regExp
_regExp
Definition: trfValidation.py:227
python.trfValidation.athenaLogFileReport.__str__
def __str__(self)
Definition: trfValidation.py:673
python.trfValidation.logFileReport._re
_re
Definition: trfValidation.py:187
dqt_zlumi_pandas.update
update
Definition: dqt_zlumi_pandas.py:42
python.trfValidation.athenaLogFileReport.__init__
def __init__(self, logfile, substepName=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'], ignoreList=None)
Class constructor.
Definition: trfValidation.py:216
VKalVrtAthena::varHolder_detail::clear
void clear(T &var)
Definition: NtupleVars.h:48
PyJobTransforms.trfUtils
Transform utility functions.
python.trfValidation.eventMatch.eventCount
def eventCount(self)
Definition: trfValidation.py:968
PyJobTransforms.trfLogger
Logging configuration for ATLAS job transforms.
python.trfValidation.logFileReport._msgDetails
_msgDetails
Definition: trfValidation.py:186
python.trfValidation.logFileReport
A class holding report information from scanning a logfile This is pretty much a virtual class,...
Definition: trfValidation.py:176
python.trfValidation.eventMatch._outEventDict
_outEventDict
Definition: trfValidation.py:979
python.trfValidation.scriptLogFileReport._levelCounter
_levelCounter
Definition: trfValidation.py:679
pickleTool.object
object
Definition: pickleTool.py:30
str
Definition: BTagTrackIpAccessor.cxx:11
python.trfValidation.logFileReport.__str__
def __str__(self)
Definition: trfValidation.py:204
python.trfValidation.scriptLogFileReport.worstError
def worstError(self)
Definition: trfValidation.py:717
python.trfValidation.ignorePatterns.searchPatterns
def searchPatterns(self)
Definition: trfValidation.py:120
readCCLHist.float
float
Definition: readCCLHist.py:83
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
python.trfValidation.scriptLogFileReport
Definition: trfValidation.py:677
python.trfValidation.athenaLogFileReport.moreDetails
def moreDetails(self, log, firstline, firstLineCount, knowledgeFile, offset=0)
Definition: trfValidation.py:455
match
bool match(std::string s1, std::string s2)
match the individual directories of two strings
Definition: hcg.cxx:356
python.trfValidation.athenaLogFileReport.worstError
def worstError(self)
Return the worst error found in the logfile (first error of the most serious type)
Definition: trfValidation.py:424
python.trfValidation.eventMatch
Small class used for vailiadating event counts between input and output files.
Definition: trfValidation.py:911