Processing math: 100%
ATLAS Offline Software
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
trfValidation.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 
14 import fnmatch
15 import os
16 import re
17 
18 from subprocess import Popen, STDOUT, PIPE
19 
20 import logging
21 msg = logging.getLogger(__name__)
22 
23 from PyUtils import RootUtils
24 
25 from PyJobTransforms.trfExeStepTools import getExecutorStepEventCounts
26 from PyJobTransforms.trfExitCodes import trfExit
27 from PyJobTransforms.trfLogger import stdLogLevels
28 from PyJobTransforms.trfArgClasses import argFile
29 
30 import PyJobTransforms.trfExceptions as trfExceptions
31 import PyJobTransforms.trfUtils as trfUtils
32 
33 
34 # @brief Check a Pool file for corruption, return N events or -1 if access problem, -2 if corruption
35 def corruptionTestPool(filename, verbose=False):
36  if not os.access(filename, os.R_OK):
37  msg.info("ERROR can't access file %s", filename)
38  return -1
39 
40  ROOT = RootUtils.import_root()
41 
42  try:
43  f = ROOT.TFile.Open(filename)
44  except Exception:
45  msg.info("Can't open file %s", filename)
46  return -1
47 
48  nEvents = None
49 
50  keys = f.GetListOfKeys()
51  for k in keys:
52  try:
53  tn = k.GetName()
54  t = f.Get(tn)
55  if not isinstance(t, ROOT.TTree): return
56  except Exception:
57  msg.info("Can't get tree %s from file %s", tn, filename)
58  f.Close()
59  return -1
60 
61  if (verbose): msg.info("Working on tree %s", tn)
62  n = t.GetEntriesFast()
63  for i in range(n):
64  s = t.GetEntry(i)
65  if s <= 0:
66  msg.info("Tree %s: Found corruption in event %i", i, n)
67  f.Close()
68  return -2
69  else:
70  if verbose and i > 0 and i % 100 == 0:
71  msg.info("Checking event %s", i)
72  msg.info("Tree %s: %i event(s) ok", tn, n)
73 
74  # Use CollectionTree determine the number of events
75  if tn == 'CollectionTree':
76  nEvents = n
77  pass # end of loop over trees
78 
79  f.Close()
80  msg.info("ROOT file %s looks ok", filename)
81  if n is None:
82  msg.info("Failed to determine number of events in file %s. No tree named 'CollectionTree'", filename)
83  return 0
84  return nEvents
85 
86 # @brief Check BS file for corruption
87 def corruptionTestBS(filename):
88  # First try AtlListBSEvents -c %filename:
89  cmd = ['AtlListBSEvents', '-c', filename]
90  p = Popen(cmd, shell=False, stdout=PIPE, stderr=STDOUT, close_fds=True)
91  while p.poll() is None:
92  line = p.stdout.readline()
93  if line:
94  msg.info("AtlListBSEvents Report: %s", line.strip())
95  rc = p.returncode
96  return rc
97 
98 
99 
101 
102 
106  def __init__(self, files=['atlas_error_mask.db'], extraSearch = []):
107  # Setup structured search patterns
109  self._initalisePatterns(files)
110 
111  # Setup extra search patterns
112  self._searchPatterns = []
113  self._initialiseSerches(extraSearch)
114 
115  @property
117  return self._structuredPatterns
118 
119  @property
120  def searchPatterns(self):
121  return self._searchPatterns
122 
123  def _initalisePatterns(self, files):
124  for patternFile in files:
125  if patternFile == "None":
126  continue
127  fullName = trfUtils.findFile(os.environ['DATAPATH'], patternFile)
128  if not fullName:
129  msg.warning('Error pattern file {0} could not be found in DATAPATH'.format(patternFile))
130  continue
131  try:
132  with open(fullName) as patternFileHandle:
133  msg.debug('Opened error file {0} from here: {1}'.format(patternFile, fullName))
134 
135  for line in patternFileHandle:
136  line = line.strip()
137  if line.startswith('#') or line == '':
138  continue
139  try:
140  # N.B. At the moment release matching is not supported!
141  (who, level, message) = [ s.strip() for s in line.split(',', 2) ]
142  if who == "":
143  # Blank means match anything, so make it so...
144  who = "."
145  reWho = re.compile(who)
146  reMessage = re.compile(message)
147  except ValueError:
148  msg.warning('Could not parse this line as a valid error pattern: {0}'.format(line))
149  continue
150  except re.error as e:
151  msg.warning('Could not parse valid regexp from {0}: {1}'.format(message, e))
152  continue
153 
154  msg.debug('Successfully parsed: who={0}, level={1}, message={2}'.format(who, level, message))
155 
156  self._structuredPatterns.append({'service': reWho, 'level': level, 'message': reMessage})
157 
158  except OSError as xxx_todo_changeme:
159  (errno, errMsg) = xxx_todo_changeme.args
160  msg.warning('Failed to open error pattern file {0}: {1} ({2})'.format(fullName, errMsg, errno))
161 
162 
163  def _initialiseSerches(self, searchStrings=[]):
164  for string in searchStrings:
165  try:
166  self._searchPatterns.append(re.compile(string))
167  msg.debug('Successfully parsed additional logfile search string: {0}'.format(string))
168  except re.error as e:
169  msg.warning('Could not parse valid regexp from {0}: {1}'.format(string, e))
170 
171 
172 
173 
177  def __init__(self, logfile=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR']):
178 
179  # We can have one logfile or a set
180  if isinstance(logfile, str):
181  self._logfile = [logfile, ]
182  else:
183  self._logfile = logfile
184 
185  self._msgLimit = msgLimit
186  self._msgDetails = msgDetailLevel
187  self._re = None
188 
189  if logfile:
190  self.scanLogFile(logfile)
191 
192  def resetReport(self):
193  pass
194 
195  def scanLogFile(self):
196  pass
197 
198  def worstError(self):
199  pass
200 
201  def firstError(self):
202  pass
203 
204  def __str__(self):
205  return ''
206 
207 
208 
212 
216  def __init__(self, logfile, substepName=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'], ignoreList=None):
217  if ignoreList:
218  self._ignoreList = ignoreList
219  else:
220  self._ignoreList = ignorePatterns()
221 
222 
227  self._regExp = re.compile(r'(?P<service>[^\s]+\w)(.*)\s+(?P<level>' + '|'.join(stdLogLevels) + r')\s+(?P<message>.*)')
228 
229  self._metaPat = re.compile(r"MetaData:\s+(.*?)\s*=\s*(.*)$")
230  self._metaData = {}
231  self._substepName = substepName
232  self._msgLimit = msgLimit
233 
234  self.resetReport()
235 
236  super(athenaLogFileReport, self).__init__(logfile, msgLimit, msgDetailLevel)
237 
238 
240  @property
241  def python(self):
242  errorDict = {'countSummary': {}, 'details': {}}
243  for level, count in self._levelCounter.items():
244  errorDict['countSummary'][level] = count
245  if self._levelCounter[level] > 0 and len(self._errorDetails[level]) > 0:
246  errorDict['details'][level] = []
247  for error in self._errorDetails[level]:
248  errorDict['details'][level].append(error)
249  return errorDict
250 
251  def resetReport(self):
252  self._levelCounter = {}
253  for level in list(stdLogLevels) + ['UNKNOWN', 'IGNORED']:
254  self._levelCounter[level] = 0
255 
256  self._errorDetails = {}
257  for level in self._levelCounter:
258  self._errorDetails[level] = []
259  # Format:
260  # List of dicts {'message': errMsg, 'firstLine': lineNo, 'count': N}
261  self._dbbytes = 0
262  self._dbtime = 0.0
263 
264 
266  def knowledgeFileHandler(self, knowledgefile):
267  # load abnormal/error line(s) from the knowledge file(s)
268  linesList = []
269  fullName = trfUtils.findFile(os.environ['DATAPATH'], knowledgefile)
270  if not fullName:
271  msg.warning('Knowledge file {0} could not be found in DATAPATH'.format(knowledgefile))
272  else:
273  try:
274  with open(fullName) as knowledgeFileHandle:
275  msg.debug('Opened knowledge file {0} from here: {1}'.format(knowledgefile, fullName))
276 
277  for line in knowledgeFileHandle:
278  if line.startswith('#') or line == '' or line =='\n':
279  continue
280  line = line.rstrip('\n')
281  linesList.append(line)
282  except OSError as e:
283  msg.warning('Failed to open knowledge file {0}: {1}'.format(fullName, e))
284  return linesList
285 
286  def scanLogFile(self, resetReport=False):
287  nonStandardErrorsList = self.knowledgeFileHandler('nonStandardErrors.db')
288 
289  if resetReport:
290  self.resetReport()
291 
292  for log in self._logfile:
293  msg.debug('Now scanning logfile {0}'.format(log))
294  seenNonStandardError = ''
295  # N.B. Use the generator so that lines can be grabbed by subroutines, e.g., core dump svc reporter
296  try:
297  myGen = trfUtils.lineByLine(log, substepName=self._substepName)
298  except IOError as e:
299  msg.error('Failed to open transform logfile {0}: {1:s}'.format(log, e))
300  # Return this as a small report
301  self._levelCounter['ERROR'] = 1
302  self._errorDetails['ERROR'] = {'message': str(e), 'firstLine': 0, 'count': 1}
303  return
304  for line, lineCounter in myGen:
305  m = self._metaPat.search(line)
306  if m is not None:
307  key, value = m.groups()
308  self._metaData[key] = value
309 
310  m = self._regExp.match(line)
311  if m is None:
312  # We didn't manage to get a recognised standard line from the file
313  # But we can check for certain other interesting things, like core dumps
314  if 'Core dump from CoreDumpSvc' in line:
315  msg.warning('Detected CoreDumpSvc report - activating core dump svc grabber')
316  self.coreDumpSvcParser(log, myGen, line, lineCounter)
317  continue
318  # Add the G4 exceptipon parsers
319  if 'G4Exception-START' in line:
320  msg.warning('Detected G4 exception report - activating G4 exception grabber')
321  self.g4ExceptionParser(myGen, line, lineCounter, 40)
322  continue
323  if '*** G4Exception' in line:
324  msg.warning('Detected G4 9.4 exception report - activating G4 exception grabber')
325  self.g494ExceptionParser(myGen, line, lineCounter)
326  continue
327  # Add the python exception parser
328  if 'Shortened traceback (most recent user call last)' in line:
329  msg.warning('Detected python exception - activating python exception grabber')
330  self.pythonExceptionParser(log, myGen, line, lineCounter)
331  continue
332  # Add parser for missed bad_alloc
333  if 'terminate called after throwing an instance of \'std::bad_alloc\'' in line:
334  msg.warning('Detected bad_alloc!')
335  self.badAllocExceptionParser(myGen, line, lineCounter)
336  continue
337  # Parser for ROOT reporting a stale file handle (see ATLASG-448)
338  # Amendment: Generalize the search (see ATLASRECTS-7121)
339  if 'Error in <TFile::ReadBuffer>' in line:
340  self.rootSysErrorParser(myGen, line, lineCounter)
341  continue
342 
343  if 'Error in <TFile::WriteBuffer>' in line:
344  self.rootSysErrorParser(myGen, line, lineCounter)
345  continue
346  # Check if the line is among the non-standard logging errors from the knowledge file
347  if any(line in l for l in nonStandardErrorsList):
348  seenNonStandardError = line
349  continue
350 
351  msg.debug('Non-standard line in %s: %s', log, line)
352  self._levelCounter['UNKNOWN'] += 1
353  continue
354 
355  # Line was matched successfully
356  fields = {}
357  for matchKey in ('service', 'level', 'message'):
358  fields[matchKey] = m.group(matchKey)
359  msg.debug('Line parsed as: {0}'.format(fields))
360 
361  # Check this is not in our ignore list
362  ignoreFlag = False
363  for ignorePat in self._ignoreList.structuredPatterns:
364  serviceMatch = ignorePat['service'].match(fields['service'])
365  levelMatch = (ignorePat['level'] == "" or ignorePat['level'] == fields['level'])
366  messageMatch = ignorePat['message'].match(fields['message'])
367  if serviceMatch and levelMatch and messageMatch:
368  msg.info('Error message "{0}" was ignored at line {1} (structured match)'.format(line, lineCounter))
369  ignoreFlag = True
370  break
371  if ignoreFlag is False:
372  for searchPat in self._ignoreList.searchPatterns:
373  if searchPat.search(line):
374  msg.info('Error message "{0}" was ignored at line {1} (search match)'.format(line, lineCounter))
375  ignoreFlag = True
376  break
377  if ignoreFlag:
378  # Got an ignore - message this to a special IGNORED error
379  fields['level'] = 'IGNORED'
380  else:
381  # Some special handling for specific errors (maybe generalise this if
382  # there end up being too many special cases)
383  # Upgrade bad_alloc to CATASTROPHE to allow for better automated handling of
384  # jobs that run out of memory
385  if 'std::bad_alloc' in fields['message']:
386  fields['level'] = 'CATASTROPHE'
387 
388  # concatenate the seen non-standard logging error to the FATAL
389  if fields['level'] == 'FATAL':
390  if seenNonStandardError:
391  line += '; ' + seenNonStandardError
392 
393  # Count this error
394  self._levelCounter[fields['level']] += 1
395 
396  # Record some error details
397  # N.B. We record 'IGNORED' errors as these really should be flagged for fixing
398  if fields['level'] == 'IGNORED' or stdLogLevels[fields['level']] >= self._msgDetails:
399  if self._levelCounter[fields['level']] <= self._msgLimit:
400  detailsHandled = False
401  for seenError in self._errorDetails[fields['level']]:
402  if seenError['message'] == line:
403  seenError['count'] += 1
404  detailsHandled = True
405  break
406  if detailsHandled is False:
407  self._errorDetails[fields['level']].append({'message': line, 'firstLine': lineCounter, 'count': 1})
408  elif self._levelCounter[fields['level']] == self._msgLimit + 1:
409  msg.warning("Found message number {0} at level {1} - this and further messages will be supressed from the report".format(self._levelCounter[fields['level']], fields['level']))
410  else:
411  # Overcounted
412  pass
413  if 'Total payload read from COOL' in fields['message']:
414  msg.debug("Found COOL payload information at line {0}".format(line))
415  a = re.match(r'(\D+)(?P<bytes>\d+)(\D+)(?P<time>\d+[.]?\d*)(\D+)', fields['message'])
416  self._dbbytes += int(a.group('bytes'))
417  self._dbtime += float(a.group('time'))
418 
419 
420 
421  def dbMonitor(self):
422  return {'bytes' : self._dbbytes, 'time' : self._dbtime} if self._dbbytes > 0 or self._dbtime > 0 else None
423 
424 
425  def worstError(self):
426  worst = stdLogLevels['DEBUG']
427  worstName = 'DEBUG'
428  for lvl, count in self._levelCounter.items():
429  if count > 0 and stdLogLevels.get(lvl, 0) > worst:
430  worstName = lvl
431  worst = stdLogLevels[lvl]
432  if len(self._errorDetails[worstName]) > 0:
433  firstError = self._errorDetails[worstName][0]
434  else:
435  firstError = None
436 
437  return {'level': worstName, 'nLevel': worst, 'firstError': firstError}
438 
439 
440 
441  def firstError(self, floor='ERROR'):
442  firstLine = firstError = None
443  firstLevel = stdLogLevels[floor]
444  firstName = floor
445  for lvl, count in self._levelCounter.items():
446  if (count > 0 and stdLogLevels.get(lvl, 0) >= stdLogLevels[floor] and
447  (firstError is None or self._errorDetails[lvl][0]['firstLine'] < firstLine)):
448  firstLine = self._errorDetails[lvl][0]['firstLine']
449  firstLevel = stdLogLevels[lvl]
450  firstName = lvl
451  firstError = self._errorDetails[lvl][0]
452 
453  return {'level': firstName, 'nLevel': firstLevel, 'firstError': firstError}
454 
455 
456  def moreDetails(self, log, firstline, firstLineCount, knowledgeFile, offset=0):
457  # Look for "abnormal" and "last normal" line(s)
458  # Make a list of last e.g. 50 lines before core dump
459  abnormalLinesList = self.knowledgeFileHandler(knowledgeFile)
460  linesToBeScanned = 50
461  seenAbnormalLines = []
462  abnormalLinesReport = {}
463  lastNormalLineReport = {}
464 
465  linesList = []
466  myGen = trfUtils.lineByLine(log)
467  for line, linecounter in myGen:
468  if linecounter in range(firstLineCount - linesToBeScanned, firstLineCount-offset):
469  linesList.append([linecounter, line])
470  elif linecounter == firstLineCount:
471  break
472 
473  for linecounter, line in reversed(linesList):
474  if re.findall(r'|'.join(abnormalLinesList), line):
475  seenLine = False
476  for dic in seenAbnormalLines:
477  # count repetitions or similar (e.g. first 15 char) abnormal lines
478  if dic['message'] == line or dic['message'][0:15] == line[0:15]:
479  dic['count'] += 1
480  seenLine = True
481  break
482  if seenLine is False:
483  seenAbnormalLines.append({'message': line, 'firstLine': linecounter, 'count': 1})
484  else:
485  if line != '':
486  lastNormalLineReport = {'message': line, 'firstLine': linecounter, 'count': 1}
487  break
488  else:
489  continue
490 
491  # Write the list of abnormal lines into the abnormalLinesReport dictionary
492  # The keys of each abnormal line have a number suffix starting with 0
493  # e.g., first abnormal line's keys are :{'mesage0', 'firstLine0', 'count0'}
494 
495  for a in range(len(seenAbnormalLines)):
496  abnormalLinesReport.update({'message{0}'.format(a): seenAbnormalLines[a]['message'], 'firstLine{0}'.format(a): seenAbnormalLines[a]['firstLine'],
497  'count{0}'.format(a): seenAbnormalLines[a]['count']})
498 
499  return {'abnormalLines': abnormalLinesReport, 'lastNormalLine': lastNormalLineReport}
500 
501 
502 
508  def coreDumpSvcParser(self, log, lineGenerator, firstline, firstLineCount):
509  _eventCounter = _run = _event = _currentAlgorithm = _functionLine = _currentFunction = None
510  coreDumpReport = 'Core dump from CoreDumpSvc'
511  # Number of lines to ignore above 'core dump' when looking for abnormal lines
512  offset = 1
513  coreDumpDetailsReport = {}
514 
515  for line, linecounter in lineGenerator:
516  m = self._regExp.match(line)
517  if m is None:
518  if 'Caught signal 11(Segmentation fault)' in line:
519  coreDumpReport = 'Segmentation fault'
520  if 'Event counter' in line:
521  _eventCounter = line
522 
523  #Lookup: 'EventID: [Run,Evt,Lumi,Time,BunchCross,DetMask] = [267599,7146597,1,1434123751:0,0,0x0,0x0,0x0]'
524  if 'EventID' in line:
525  match = re.findall(r'.*?', line)
526  if match and match.__len__() >= 2: # Assuming the line contains at-least one key-value pair.
527  brackets = "[]"
528  commaDelimer = ','
529  keys = (match[0].strip(brackets)).split(commaDelimer)
530  values = (match[1].strip(brackets)).split(commaDelimer)
531 
532  if 'Run' in keys:
533  _run = 'Run: ' + values[keys.index('Run')]
534 
535  if 'Evt' in keys:
536  _event = 'Evt: ' + values[keys.index('Evt')]
537 
538  if 'Current algorithm' in line:
539  _currentAlgorithm = line
540  if '<signal handler called>' in line:
541  _functionLine = linecounter+1
542  if _functionLine and linecounter is _functionLine:
543  if ' in ' in line:
544  _currentFunction = 'Current Function: ' + line.split(' in ')[1].split()[0]
545  else:
546  _currentFunction = 'Current Function: ' + line.split()[1]
547  else:
548  # Can this be done - we want to push the line back into the generator to be
549  # reparsed in the normal way (might need to make the generator a class with the
550  # __exec__ method supported (to get the line), so that we can then add a
551  # pushback onto an internal FIFO stack
552  # lineGenerator.pushback(line)
553  break
554  _eventCounter = 'Event counter: unknown' if not _eventCounter else _eventCounter
555  _run = 'Run: unknown' if not _run else _run
556  _event = 'Evt: unknown' if not _event else _event
557  _currentAlgorithm = 'Current algorithm: unknown' if not _currentAlgorithm else _currentAlgorithm
558  _currentFunction = 'Current Function: unknown' if not _currentFunction else _currentFunction
559  coreDumpReport = '{0}: {1}; {2}; {3}; {4}; {5}'.format(coreDumpReport, _eventCounter, _run, _event, _currentAlgorithm, _currentFunction)
560 
561  coreDumpDetailsReport = self.moreDetails(log, firstline, firstLineCount, 'knowledgeFile.db', offset)
562  abnormalLines = coreDumpDetailsReport['abnormalLines']
563 
564  # concatenate an extract of first seen abnormal line to the core dump message
565  if 'message0' in abnormalLines.keys():
566  coreDumpReport += '; Abnormal line seen just before core dump: ' + abnormalLines['message0'][0:30] + '...[truncated] ' + '(see the jobReport)'
567 
568  # Core dumps are always fatal...
569  msg.debug('Identified core dump - adding to error detail report')
570  self._levelCounter['FATAL'] += 1
571  self._errorDetails['FATAL'].append({'moreDetails': coreDumpDetailsReport, 'message': coreDumpReport, 'firstLine': firstLineCount, 'count': 1})
572 
573 
574  def g494ExceptionParser(self, lineGenerator, firstline, firstLineCount):
575  g4Report = firstline
576  g4lines = 1
577  if 'Aborting execution' not in g4Report:
578  for line, linecounter in lineGenerator:
579  g4Report += os.linesep + line
580  g4lines += 1
581  # Test for the closing string
582  if '*** ' in line:
583  break
584  if g4lines >= 25:
585  msg.warning('G4 exception closing string not found within {0} log lines of line {1}'.format(g4lines, firstLineCount))
586  break
587 
588  # G4 exceptions can be fatal or they can be warnings...
589  msg.debug('Identified G4 exception - adding to error detail report')
590  if "just a warning" in g4Report:
591  if self._levelCounter['WARNING'] <= self._msgLimit:
592  self._levelCounter['WARNING'] += 1
593  self._errorDetails['WARNING'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1})
594  elif self._levelCounter['WARNING'] == self._msgLimit + 1:
595  msg.warning("Found message number {0} at level WARNING - this and further messages will be supressed from the report".format(self._levelCounter['WARNING']))
596  else:
597  self._levelCounter['FATAL'] += 1
598  self._errorDetails['FATAL'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1})
599 
600  def g4ExceptionParser(self, lineGenerator, firstline, firstLineCount, g4ExceptionLineDepth):
601  g4Report = firstline
602  g4lines = 1
603  for line, linecounter in lineGenerator:
604  g4Report += os.linesep + line
605  g4lines += 1
606  # Test for the closing string
607  if 'G4Exception-END' in line:
608  break
609  if g4lines >= g4ExceptionLineDepth:
610  msg.warning('G4 exception closing string not found within {0} log lines of line {1}'.format(g4lines, firstLineCount))
611  break
612 
613  # G4 exceptions can be fatal or they can be warnings...
614  msg.debug('Identified G4 exception - adding to error detail report')
615  if "-------- WWWW -------" in g4Report:
616  if self._levelCounter['WARNING'] <= self._msgLimit:
617  self._levelCounter['WARNING'] += 1
618  self._errorDetails['WARNING'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1})
619  elif self._levelCounter['WARNING'] == self._msgLimit + 1:
620  msg.warning("Found message number {0} at level WARNING - this and further messages will be supressed from the report".format(self._levelCounter['WARNING']))
621  else:
622  self._levelCounter['FATAL'] += 1
623  self._errorDetails['FATAL'].append({'message': g4Report, 'firstLine': firstLineCount, 'count': 1})
624 
625 
626  def pythonExceptionParser(self, log, lineGenerator, firstline, firstLineCount):
627  pythonExceptionReport = ""
628  lastLine = firstline
629  lastLine2 = firstline
630  pythonErrorLine = firstLineCount
631  pyLines = 1
632  for line, linecounter in lineGenerator:
633  if 'Py:Athena' in line and 'INFO leaving with code' in line:
634  if len(lastLine)> 0:
635  pythonExceptionReport = lastLine
636  pythonErrorLine = linecounter-1
637  else: # Sometimes there is a blank line after the exception
638  pythonExceptionReport = lastLine2
639  pythonErrorLine = linecounter-2
640  break
641  if pyLines >= 25:
642  msg.warning('Could not identify python exception correctly scanning {0} log lines after line {1}'.format(pyLines, firstLineCount))
643  pythonExceptionReport = "Unable to identify specific exception"
644  pythonErrorLine = firstLineCount
645  break
646  lastLine2 = lastLine
647  lastLine = line
648  pyLines += 1
649 
650  pythonExceptionDetailsReport = self.moreDetails(log, firstline, firstLineCount, 'knowledgeFile.db')
651  abnormalLines = pythonExceptionDetailsReport['abnormalLines']
652 
653  # concatenate an extract of first seen abnormal line to pythonExceptionReport
654  if 'message0' in abnormalLines.keys():
655  pythonExceptionReport += '; Abnormal line seen just before python exception: ' + abnormalLines['message0'][0:30] + '...[truncated] ' + '(see the jobReport)'
656 
657  msg.debug('Identified python exception - adding to error detail report')
658  self._levelCounter['FATAL'] += 1
659  self._errorDetails['FATAL'].append({'moreDetails': pythonExceptionDetailsReport, 'message': pythonExceptionReport, 'firstLine': pythonErrorLine, 'count': 1})
660 
661 
662  def badAllocExceptionParser(self, lineGenerator, firstline, firstLineCount):
663  badAllocExceptionReport = 'terminate after \'std::bad_alloc\'.'
664 
665  msg.debug('Identified bad_alloc - adding to error detail report')
666  self._levelCounter['CATASTROPHE'] += 1
667  self._errorDetails['CATASTROPHE'].append({'message': badAllocExceptionReport, 'firstLine': firstLineCount, 'count': 1})
668 
669  def rootSysErrorParser(self, lineGenerator, firstline, firstLineCount):
670  msg.debug('Identified ROOT IO problem - adding to error detail report')
671  self._levelCounter['FATAL'] += 1
672  self._errorDetails['FATAL'].append({'message': firstline, 'firstLine': firstLineCount, 'count': 1})
673 
674  def __str__(self):
675  return str(self._levelCounter) + str(self._errorDetails)
676 
677 
679  def __init__(self, logfile=None, msgLimit=200, msgDetailLevel=stdLogLevels['ERROR']):
680  self._levelCounter = {}
681  self._errorDetails = {}
682  self.resetReport()
683  super(scriptLogFileReport, self).__init__(logfile, msgLimit, msgDetailLevel)
684 
685  def resetReport(self):
686  self._levelCounter.clear()
687  for level in list(stdLogLevels) + ['UNKNOWN', 'IGNORED']:
688  self._levelCounter[level] = 0
689 
690  self._errorDetails.clear()
691  for level in self._levelCounter: # List of dicts {'message': errMsg, 'firstLine': lineNo, 'count': N}
692  self._errorDetails[level] = []
693 
694  def scanLogFile(self, resetReport=False):
695  if resetReport:
696  self.resetReport()
697 
698  for log in self._logfile:
699  msg.info('Scanning logfile {0}'.format(log))
700  try:
701  myGen = trfUtils.lineByLine(log)
702  except IOError as e:
703  msg.error('Failed to open transform logfile {0}: {1:s}'.format(log, e))
704  # Return this as a small report
705  self._levelCounter['ERROR'] = 1
706  self._errorDetails['ERROR'] = {'message': str(e), 'firstLine': 0, 'count': 1}
707  return
708 
709  for line, lineCounter in myGen:
710  # TODO: This implementation currently only scans for Root SysErrors.
711  # General solution would be a have common error parser for all system level
712  # errors those all also handled by AthenaLogFileReport.
713  if line.__contains__('Error in <TFile::ReadBuffer>') or \
714  line.__contains__('Error in <TFile::WriteBuffer>'):
715  self.rootSysErrorParser(line, lineCounter)
716 
717  # Return the worst error found in the logfile (first error of the most serious type)
718  def worstError(self):
719  worstlevelName = 'DEBUG'
720  worstLevel = stdLogLevels[worstlevelName]
721  for levelName, count in self._levelCounter.items():
722  if count > 0 and stdLogLevels.get(levelName, 0) > worstLevel:
723  worstlevelName = levelName
724  worstLevel = stdLogLevels[levelName]
725 
726  if len(self._errorDetails[worstlevelName]) > 0:
727  firstError = self._errorDetails[worstlevelName][0]
728  else:
729  firstError = None
730 
731  return {'level': worstlevelName, 'nLevel': worstLevel, 'firstError': firstError}
732 
733  def __str__(self):
734  return str(self._levelCounter) + str(self._errorDetails)
735 
736  def rootSysErrorParser(self, line, lineCounter):
737  msg.debug('Identified ROOT IO problem - adding to error detail report')
738  self._levelCounter['FATAL'] += 1
739  self._errorDetails['FATAL'].append({'message': line, 'firstLine': lineCounter, 'count': 1})
740 
741 
744 def returnIntegrityOfFile(file, functionName):
745  try:
746  import PyJobTransforms.trfFileValidationFunctions as trfFileValidationFunctions
747  except Exception as exception:
748  msg.error('Failed to import module PyJobTransforms.trfFileValidationFunctions with error {error}'.format(error = exception))
749  raise
750  validationFunction = getattr(trfFileValidationFunctions, functionName)
751  return validationFunction(file)
752 
753 
754 
757 def performStandardFileValidation(dictionary, io, parallelMode = False, multithreadedMode=False):
758  if parallelMode is False:
759  msg.info('Starting legacy (serial) file validation')
760  for (key, arg) in dictionary.items():
761  if not isinstance(arg, argFile):
762  continue
763  if not arg.io == io:
764  continue
765  if arg.auxiliaryFile:
766  continue
767 
768  msg.info('Validating data type %s...', key)
769 
770  for fname in arg.value:
771  msg.info('Validating file %s...', fname)
772 
773  if io == "output":
774  msg.info('{0}: Testing corruption...'.format(fname))
775  if multithreadedMode:
776  os.environ['TRF_MULTITHREADED_VALIDATION']='TRUE'
777  if arg.getSingleMetadata(fname, 'integrity') is True:
778  msg.info('Corruption test passed.')
779  elif arg.getSingleMetadata(fname, 'integrity') is False:
780  msg.error('Corruption test failed.')
781  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % fname)
782  elif arg.getSingleMetadata(fname, 'integrity') == 'UNDEFINED':
783  msg.info('No corruption test defined.')
784  elif arg.getSingleMetadata(fname, 'integrity') is None:
785  msg.error('Could not check for file integrity')
786  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s might be missing' % fname)
787  else:
788  msg.error('Unknown rc from corruption test.')
789  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % fname)
790 
791 
792  msg.info('{0}: Testing event count...'.format(fname))
793  if arg.getSingleMetadata(fname, 'nentries') is not None:
794  msg.info('Event counting test passed ({0!s} events).'.format(arg.getSingleMetadata(fname, 'nentries')))
795  else:
796  msg.error('Event counting test failed.')
797  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % fname)
798 
799 
800  msg.info('{0}: Checking if guid exists...'.format(fname))
801  if arg.getSingleMetadata(fname, 'file_guid') is None:
802  msg.error('Guid could not be determined.')
803  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'File %s did not pass corruption test' % fname)
804  elif arg.getSingleMetadata(fname, 'file_guid') == 'UNDEFINED':
805  msg.info('Guid not defined.')
806  else:
807  msg.info('Guid is %s', arg.getSingleMetadata(fname, 'file_guid'))
808  msg.info('Stopping legacy (serial) file validation')
809  if parallelMode is True:
810  msg.info('Starting parallel file validation')
811  # Create lists of files and args. These lists are to be used with zip in
812  # order to check and update file integrity metadata as appropriate.
813  fileList = []
814  argList = []
815  # Create a list of the integrity functions for files.
816  integrityFunctionList = []
817  # Create a list for collation of file validation jobs for submission to
818  # the parallel job processor.
819  jobs = []
820  for (key, arg) in dictionary.items():
821  if not isinstance(arg, argFile):
822  continue
823  if not arg.io == io:
824  continue
825  msg.debug('Collating list of files for validation')
826  for fname in arg.value:
827  msg.debug('Appending file {fileName} to list of files for validation'.format(fileName = str(fname)))
828  # Append the current file to the file list.
829  fileList.append(fname)
830  # Append the current arg to the arg list.
831  argList.append(arg)
832  # Append the current integrity function name to the integrity
833  # function list if it exists. If it does not exist, raise an
834  # exception.
835  if arg.integrityFunction:
836  integrityFunctionList.append(arg.integrityFunction)
837  else:
838  msg.error('Validation function for file {fileName} not available for parallel file validation'.format(fileName = str(fname)))
839  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_FAIL'), 'Validation function for file %s not available for parallel file validation' % str(fname))
840  # Compose a job for validation of the current file using the
841  # appropriate validation function, which is derived from the
842  # associated data attribute arg.integrityFunction.
843  jobs.append(
844  trfUtils.Job(
845  name = "validation of file {fileName}".format(
846  fileName = str(fname)),
847  workFunction = returnIntegrityOfFile,
848  workFunctionKeywordArguments = {
849  'file': fname,
850  'functionName': arg.integrityFunction
851  },
852  workFunctionTimeout = 600
853  )
854  )
855  # Contain the file validation jobs in a job group for submission to the
856  # parallel job processor.
857  jobGroup1 = trfUtils.JobGroup(
858  name = "standard file validation",
859  jobs = jobs
860  )
861  # Prepare the parallel job processor.
862  parallelJobProcessor1 = trfUtils.ParallelJobProcessor()
863  # Submit the file validation jobs to the parallel job processor.
864  msg.info('Submitting file validation jobs to parallel job processor')
865  parallelJobProcessor1.submit(jobSubmission = jobGroup1)
866  resultsList = parallelJobProcessor1.getResults()
867  msg.info('Parallel file validation complete')
868  # Update file metadata with integrity results using the lists fileList,
869  # argList and resultsList.
870  msg.info('Processing file integrity results')
871  for currentFile, currentArg, currentIntegrityFunction, currentResult in zip(fileList, argList, integrityFunctionList, resultsList):
872  msg.info('{IO} file {fileName} has integrity status {integrityStatus} as determined by integrity function {integrityFunction}'.format(
873  IO = str(io),
874  fileName = str(currentFile),
875  integrityStatus = str(currentResult),
876  integrityFunction = str(currentIntegrityFunction)
877  ))
878  # If the first (Boolean) element of the result tuple for the current
879  # file is True, update the integrity metadata. If it is False, raise
880  # an exception.
881  if currentResult[0] is True:
882  msg.info('Updating integrity metadata for file {fileName}'.format(fileName = str(currentFile)))
883  currentArg._setMetadata(files=[currentFile,], metadataKeys={'integrity': currentResult[0]})
884  else:
885  exceptionMessage = "{IO} file validation failure on file {fileName} with integrity status {integrityStatus} as determined by integrity function {integrityFunction}".format(
886  IO = str(io),
887  fileName = str(currentFile),
888  integrityStatus = str(currentResult),
889  integrityFunction = str(currentIntegrityFunction)
890  )
891  msg.error("exception message: {exceptionMessage}".format(
892  exceptionMessage = exceptionMessage
893  ))
894  if io == 'input':
895  exitCodeName = 'TRF_INPUT_FILE_VALIDATION_FAIL'
896  elif io == 'output':
897  exitCodeName = 'TRF_OUTPUT_FILE_VALIDATION_FAIL'
899  trfExit.nameToCode(exitCodeName),
900  exceptionMessage
901  )
902  # Perform a check to determine if the file integrity metadata is
903  # correct.
904  if currentArg.getSingleMetadata(currentFile, metadataKey = 'integrity', populate = False) == currentResult[0]:
905  msg.debug("file integrity metadata update successful")
906  else:
907  msg.error("file integrity metadata update unsuccessful")
908  msg.info('Stopping parallel file validation')
909 
910 
911 
913 
914 
921  def __init__(self, executor, eventCountConf=None, eventCountConfOverwrite=False):
922  self._executor = executor
923  self._eventCount = None
924 
925 
936  simEventEff = 0.995
937  self._eventCountConf = {}
938  self._eventCountConf['EVNT'] = {'EVNT_MRG':"match", "HITS": simEventEff, "EVNT_TR": "filter", "DAOD_TRUTH*" : "match"}
939  self._eventCountConf['EVNT_TR'] = {'HITS': simEventEff}
940  self._eventCountConf['HITS'] = {'RDO':"match", 'HITS_RSM': simEventEff, "HITS_MRG":"match", 'HITS_FILT': simEventEff, "RDO_FILT": "filter", "DAOD_TRUTH*" : "match", "HIST_SIM" : "match"}
941  self._eventCountConf['BS'] = {'ESD': "match", 'DRAW_*':"filter", 'NTUP_*':"filter", "BS_MRG":"match", 'DESD*': "filter", 'AOD':"match", 'DAOD*':"filter", "DAOD_PHYS":"match", "DAOD_PHYSLITE":"match"}
942  self._eventCountConf['RDO*'] = {'ESD': "match", 'DRAW_*':"filter", 'NTUP_*':"filter", "RDO_MRG":"match", "RDO_TRIG":"match", 'AOD':"match", 'DAOD*':"filter", "DAOD_PHYS":"match", "DAOD_PHYSLITE":"match", "HIST_DIGI":"match"}
943  self._eventCountConf['ESD'] = {'ESD_MRG': "match", 'AOD':"match", 'DESD*':"filter", 'DAOD_*':"filter", 'NTUP_*':"filter", "DAOD_PHYS":"match", "DAOD_PHYSLITE":"match"}
944  self._eventCountConf['AOD'] = {'AOD_MRG' : "match", 'TAG':"match", "NTUP_*":"filter", "DAOD_*":"filter", 'NTUP_*':"filter", "DAOD_PHYS":"match", "DAOD_PHYSLITE":"match"}
945  self._eventCountConf['AOD_MRG'] = {'TAG':"match"}
946  self._eventCountConf['DAOD_*'] = {'DAOD_*_MRG' : "match"}
947  self._eventCountConf['TAG'] = {'TAG_MRG': "match"}
948  self._eventCountConf['HIST'] = {'HIST_MRG': "match"}
949  self._eventCountConf['NTUP_COMMON'] = {'DNTUP*': "filter"}
950  self._eventCountConf['NTUP_*'] = {'NTUP_*_MRG': "match"}
951  # Next one comprises special data type names for smart merging of AthenaMP worker outputs
952  self._eventCountConf['POOL_MRG_INPUT'] = {'POOL_MRG_OUTPUT': "match"}
953 
954 
955  if eventCountConf:
956  if eventCountConfOverwrite is True:
957  self._eventCountConf = eventCountConf
958  else:
959  self._eventCountConf.update(eventCountConf)
960 
961  msg.debug('Event count check configuration is: {0}'.format(self._eventCountConf))
962  if hasattr(self._executor, 'name'):
963  msg.debug('Event count check ready for executor {0}'.format(self._executor.name))
964 
965  if self._executor is not None:
966  self.configureCheck(override=False)
967 
968  @property
969  def eventCount(self):
970  return self._eventCount
971 
972 
976  def configureCheck(self, override=False):
977  if override:
978  msg.info('Overriding check configuration with: {0}'.format(override))
979  self._inEventDict = override['inEventDict']
980  self._outEventDict = override['outEventDict']
981  self._skipEvents = override['skipEvents']
982  self._maxEvents = override['maxEvents']
983  self._evAccEff = override['evAccEff']
984  else:
985  # Input data from executor
986  self._inEventDict = {}
987  for dataTypeName in self._executor.input:
988  try:
989  self._inEventDict[dataTypeName] = self._executor.conf.dataDictionary[dataTypeName].nentries
990  msg.debug('Input data type {0} has {1} events'.format(dataTypeName, self._inEventDict[dataTypeName]))
991  except KeyError:
992  msg.warning('Found no dataDictionary entry for input data type {0}'.format(dataTypeName))
993 
994  # Output data from executor
995  self._outEventDict = {}
996  for dataTypeName in self._executor.output:
997  try:
998  self._outEventDict[dataTypeName] = self._executor.conf.dataDictionary[dataTypeName].nentries
999  msg.debug('Output data type {0} has {1} events'.format(dataTypeName, self._outEventDict[dataTypeName]))
1000  except KeyError:
1001  msg.warning('Found no dataDictionary entry for output data type {0}'.format(dataTypeName))
1002 
1003  # Find if we have a skipEvents applied
1004  if "skipEvents" in self._executor.conf.argdict:
1005  self._skipEvents = self._executor.conf.argdict['skipEvents'].returnMyValue(exe=self._executor)
1006  else:
1007  self._skipEvents = None
1008 
1009  # Find if we have a maxEvents applied
1010  if "maxEvents" in self._executor.conf.argdict:
1011  self._maxEvents = self._executor.conf.argdict['maxEvents'].returnMyValue(exe=self._executor)
1012  if self._maxEvents == -1:
1013  self._maxEvents = None
1014  else:
1015  self._maxEvents = None
1016 
1017  # Executor substeps handling
1018  if self._executor.conf.totalExecutorSteps > 1 and self._executor.conf.executorStep < self._executor.conf.totalExecutorSteps - 1:
1019  executorEventCounts, executorEventSkips = getExecutorStepEventCounts(self._executor)
1020  self._maxEvents = executorEventCounts[self._executor.conf.executorStep]
1021  self._skipEvents = executorEventSkips[self._executor.conf.executorStep]
1022 
1023  # Global eventAcceptanceEfficiency set?
1024  if "eventAcceptanceEfficiency" in self._executor.conf.argdict:
1025  self._evAccEff = self._executor.conf.argdict['eventAcceptanceEfficiency'].returnMyValue(exe=self._executor)
1026  if (self._evAccEff is None):
1027  self._evAccEff = 0.99
1028  else:
1029  self._evAccEff = 0.99
1030 
1031  msg.debug("Event check conf: {0} {1}, {2}, {3}, {4}".format(self._inEventDict, self._outEventDict, self._skipEvents,
1032  self._maxEvents, self._evAccEff))
1033 
1034 
1035 
1036  def decide(self):
1037  # We have all that we need to proceed: input and output data, skip and max events plus any efficiency factor
1038  # So loop over the input and output data and make our checks
1039  for inData, neventsInData in self._inEventDict.items():
1040  if not isinstance(neventsInData, int):
1041  msg.warning('File size metadata for {inData} was not countable, found {neventsInData}. No event checks possible for this input data.'.format(inData=inData, neventsInData=neventsInData))
1042  continue
1043  if inData in self._eventCountConf:
1044  inDataKey = inData
1045  else:
1046  # OK, try a glob match in this case (YMMV)
1047  matchedInData = False
1048  for inDataKey in self._eventCountConf:
1049  if fnmatch.fnmatch(inData, inDataKey):
1050  msg.info("Matched input data type {inData} to {inDataKey} by globbing".format(inData=inData, inDataKey=inDataKey))
1051  matchedInData = True
1052  break
1053  if not matchedInData:
1054  msg.warning('No defined event count match for {inData} -> {outData}, so no check(s) possible in this case.'.format(inData=inData, outData=list(self._outEventDict)))
1055  continue
1056 
1057  # Now calculate the expected number of processed events for this input
1058  expectedEvents = neventsInData
1059  if self._skipEvents is not None and self._skipEvents > 0:
1060  expectedEvents -= self._skipEvents
1061  if expectedEvents < 0:
1062  msg.warning('skipEvents was set higher than the input events in {inData}: {skipEvents} > {neventsInData}. This is not an error, but it is not a normal configuration. Expected events is now 0.'.format(inData=inData, skipEvents=self._skipEvents, neventsInData=neventsInData))
1063  expectedEvents = 0
1064  if self._maxEvents is not None:
1065  if expectedEvents < self._maxEvents:
1066  if self._skipEvents is not None:
1067  msg.warning('maxEvents was set higher than inputEvents-skipEvents for {inData}: {maxEvents} > {neventsInData}-{skipEvents}. This is not an error, but it is not a normal configuration. Expected events remains {expectedEvents}.'.format(inData=inData, maxEvents=self._maxEvents, neventsInData=neventsInData, skipEvents=self._skipEvents, expectedEvents=expectedEvents))
1068  else:
1069  msg.warning('maxEvents was set higher than inputEvents for {inData}: {maxEvents} > {neventsInData}. This is not an error, but it is not a normal configuration. Expected events remains {expectedEvents}.'.format(inData=inData, maxEvents=self._maxEvents, neventsInData=neventsInData, expectedEvents=expectedEvents))
1070  else:
1071  expectedEvents = self._maxEvents
1072  msg.debug('Expected number of processed events for {0} is {1}'.format(inData, expectedEvents))
1073 
1074  # Loop over output data - first find event count configuration
1075  for outData, neventsOutData in self._outEventDict.items():
1076  if not isinstance(neventsOutData, int):
1077  msg.warning('File size metadata for {outData} was not countable, found "{neventsOutData}". No event checks possible for this output data.'.format(outData=outData, neventsOutData=neventsOutData))
1078  continue
1079  if outData in self._eventCountConf[inDataKey]:
1080  checkConf = self._eventCountConf[inDataKey][outData]
1081  outDataKey = outData
1082  else:
1083  # Look for glob matches
1084  checkConf = None
1085  for outDataKey, outDataConf in self._eventCountConf[inDataKey].items():
1086  if fnmatch.fnmatch(outData, outDataKey):
1087  msg.info('Matched output data type {outData} to {outDatakey} by globbing'.format(outData=outData, outDatakey=outDataKey))
1088  outDataKey = outData
1089  checkConf = outDataConf
1090  break
1091  if not checkConf:
1092  msg.warning('No defined event count match for {inData} -> {outData}, so no check possible in this case.'.format(inData=inData, outData=outData))
1093  continue
1094  msg.debug('Event count check for {inData} to {outData} is {checkConf}'.format(inData=inData, outData=outData, checkConf=checkConf))
1095 
1096  # Do the check for thsi input/output combination
1097  if checkConf == 'match':
1098  # We need an exact match
1099  if neventsOutData == expectedEvents:
1100  msg.info("Event count check for {inData} to {outData} passed: all processed events found ({neventsOutData} output events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData))
1101  else:
1102  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1103  'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1104  elif checkConf == 'filter':
1105  if neventsOutData <= expectedEvents and neventsOutData >= 0:
1106  msg.info("Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1107  else:
1108  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1109  'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from 0 to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1110  elif checkConf == 'minEff':
1111  if neventsOutData >= int(expectedEvents * self._evAccEff) and neventsOutData <= expectedEvents:
1112  msg.info("Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1113  else:
1114  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1115  'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from {minEvents} to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData,
1116  minEvents=int(expectedEvents * self._evAccEff), expectedEvents=expectedEvents))
1117  elif isinstance(checkConf, (float, int)):
1118  checkConf = float(checkConf)
1119  if checkConf < 0.0 or checkConf > 1.0:
1120  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1121  'Event count check for {inData} to {outData} is misconfigured: the efficiency factor of {eff} is not between 0 and 1.'.format(inData=inData, outData=outData, eff=checkConf))
1122  if neventsOutData >= int(expectedEvents * checkConf) and neventsOutData <= expectedEvents:
1123  msg.info("Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1124  else:
1125  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1126  'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from {minEvents} to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData,
1127  minEvents=int(expectedEvents * checkConf), expectedEvents=expectedEvents))
1128  else:
1129  raise trfExceptions.TransformValidationException(trfExit.nameToCode('TRF_EXEC_VALIDATION_EVENTCOUNT'),
1130  'Unrecognised event count configuration for {inData} to {outData}: "{conf}" is not known'.format(inData=inData, outData=outData, conf=checkConf))
1131  self._eventCount = expectedEvents
1132  return True
python.trfValidation.athenaLogFileReport.knowledgeFileHandler
def knowledgeFileHandler(self, knowledgefile)
Generally, a knowledge file consists of non-standard logging error/abnormal lines which are left out ...
Definition: trfValidation.py:266
python.trfValidation.ignorePatterns._initalisePatterns
def _initalisePatterns(self, files)
Definition: trfValidation.py:123
python.trfValidation.athenaLogFileReport._errorDetails
_errorDetails
Definition: trfValidation.py:256
python.trfValidation.eventMatch._skipEvents
_skipEvents
Definition: trfValidation.py:981
python.trfValidation.athenaLogFileReport._metaPat
_metaPat
Definition: trfValidation.py:229
python.trfValidation.scriptLogFileReport.scanLogFile
def scanLogFile(self, resetReport=False)
Definition: trfValidation.py:694
python.trfValidation.athenaLogFileReport._dbtime
_dbtime
Definition: trfValidation.py:262
vtune_athena.format
format
Definition: vtune_athena.py:14
PyJobTransforms.trfFileValidationFunctions
Transform file validation functions.
python.trfValidation.eventMatch.decide
def decide(self)
Perform an event count check.
Definition: trfValidation.py:1036
python.trfValidation.logFileReport.worstError
def worstError(self)
Definition: trfValidation.py:198
python.trfValidation.eventMatch._eventCount
_eventCount
Definition: trfValidation.py:923
python.trfValidation.logFileReport.firstError
def firstError(self)
Definition: trfValidation.py:201
python.trfValidation.logFileReport.scanLogFile
def scanLogFile(self)
Definition: trfValidation.py:195
python.trfValidation.ignorePatterns.structuredPatterns
def structuredPatterns(self)
Definition: trfValidation.py:116
python.trfValidation.athenaLogFileReport._substepName
_substepName
Definition: trfValidation.py:231
python.trfValidation.logFileReport._logfile
_logfile
Definition: trfValidation.py:181
python.trfValidation.athenaLogFileReport.rootSysErrorParser
def rootSysErrorParser(self, lineGenerator, firstline, firstLineCount)
Definition: trfValidation.py:669
PyJobTransforms.trfArgClasses
Transform argument class definitions.
PyJobTransforms.trfExitCodes
Module for transform exit codes.
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.trfValidation.scriptLogFileReport.rootSysErrorParser
def rootSysErrorParser(self, line, lineCounter)
Definition: trfValidation.py:736
python.trfValidation.logFileReport.__init__
def __init__(self, logfile=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'])
Definition: trfValidation.py:177
search
void search(TDirectory *td, const std::string &s, std::string cwd, node *n)
recursive directory search for TH1 and TH2 and TProfiles
Definition: hcg.cxx:738
python.trfValidation.logFileReport._msgLimit
_msgLimit
Definition: trfValidation.py:185
python.trfValidation.returnIntegrityOfFile
def returnIntegrityOfFile(file, functionName)
return integrity of file using appropriate validation function @ detail This method returns the integ...
Definition: trfValidation.py:744
python.trfValidation.athenaLogFileReport._dbbytes
_dbbytes
Definition: trfValidation.py:261
python.trfValidation.athenaLogFileReport._metaData
_metaData
Definition: trfValidation.py:230
python.trfValidation.ignorePatterns.__init__
def __init__(self, files=['atlas_error_mask.db'], extraSearch=[])
Load error patterns from files.
Definition: trfValidation.py:106
python.trfValidation.eventMatch.__init__
def __init__(self, executor, eventCountConf=None, eventCountConfOverwrite=False)
check in- and output event counts
Definition: trfValidation.py:921
python.trfValidation.athenaLogFileReport._ignoreList
_ignoreList
Definition: trfValidation.py:218
python.trfValidation.logFileReport.resetReport
def resetReport(self)
Definition: trfValidation.py:192
python.trfValidation.athenaLogFileReport.g4ExceptionParser
def g4ExceptionParser(self, lineGenerator, firstline, firstLineCount, g4ExceptionLineDepth)
Definition: trfValidation.py:600
python.trfValidation.athenaLogFileReport.python
def python(self)
Produce a python dictionary summary of the log file report for inclusion in the executor report.
Definition: trfValidation.py:241
python.trfValidation.athenaLogFileReport.badAllocExceptionParser
def badAllocExceptionParser(self, lineGenerator, firstline, firstLineCount)
Definition: trfValidation.py:662
python.trfValidation.scriptLogFileReport._errorDetails
_errorDetails
Definition: trfValidation.py:681
python.trfUtils.ParallelJobProcessor
ParallelJobProcessor: a multiple-process processor of Job objects.
Definition: trfUtils.py:867
python.trfValidation.eventMatch._evAccEff
_evAccEff
Definition: trfValidation.py:983
python.trfValidation.scriptLogFileReport.__init__
def __init__(self, logfile=None, msgLimit=200, msgDetailLevel=stdLogLevels['ERROR'])
Definition: trfValidation.py:679
python.trfValidation.athenaLogFileReport.dbMonitor
def dbMonitor(self)
Return data volume and time spend to retrieve information from the database.
Definition: trfValidation.py:421
python.trfValidation.eventMatch._inEventDict
_inEventDict
Definition: trfValidation.py:979
python.LArMinBiasAlgConfig.int
int
Definition: LArMinBiasAlgConfig.py:59
python.trfValidation.ignorePatterns
Class of patterns that can be ignored from athena logfiles.
Definition: trfValidation.py:100
python.trfValidation.athenaLogFileReport.pythonExceptionParser
def pythonExceptionParser(self, log, lineGenerator, firstline, firstLineCount)
Definition: trfValidation.py:626
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
python.trfValidation.athenaLogFileReport.firstError
def firstError(self, floor='ERROR')
Return the first error found in the logfile above a certain loglevel.
Definition: trfValidation.py:441
python.trfValidation.athenaLogFileReport
Logfile suitable for scanning logfiles with an athena flavour, i.e., lines of the form "SERVICE LOGL...
Definition: trfValidation.py:211
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
python.trfValidation.corruptionTestPool
def corruptionTestPool(filename, verbose=False)
Definition: trfValidation.py:35
python.trfUtils.Job
Job: a set of pieces of information relevant to a given work function.
Definition: trfUtils.py:723
python.trfValidation.athenaLogFileReport.resetReport
def resetReport(self)
Definition: trfValidation.py:251
python.trfValidation.performStandardFileValidation
def performStandardFileValidation(dictionary, io, parallelMode=False, multithreadedMode=False)
perform standard file validation @ detail This method performs standard file validation in either ser...
Definition: trfValidation.py:757
python.trfValidation.ignorePatterns._structuredPatterns
_structuredPatterns
Definition: trfValidation.py:108
python.trfValidation.athenaLogFileReport.g494ExceptionParser
def g494ExceptionParser(self, lineGenerator, firstline, firstLineCount)
Definition: trfValidation.py:574
python.trfValidation.ignorePatterns._searchPatterns
_searchPatterns
Definition: trfValidation.py:112
python.trfExceptions.TransformValidationException
Group of validation exceptions.
Definition: trfExceptions.py:50
python.trfValidation.eventMatch._eventCountConf
_eventCountConf
Definition: trfValidation.py:937
python.trfValidation.corruptionTestBS
def corruptionTestBS(filename)
Definition: trfValidation.py:87
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.trfValidation.eventMatch.configureCheck
def configureCheck(self, override=False)
Setup the parameters needed to define particular checks.
Definition: trfValidation.py:976
python.trfValidation.athenaLogFileReport.coreDumpSvcParser
def coreDumpSvcParser(self, log, lineGenerator, firstline, firstLineCount)
Attempt to suck a core dump report from the current logfile This function scans logs in two different...
Definition: trfValidation.py:508
python.trfValidation.eventMatch._maxEvents
_maxEvents
Definition: trfValidation.py:982
python.trfValidation.ignorePatterns._initialiseSerches
def _initialiseSerches(self, searchStrings=[])
Definition: trfValidation.py:163
python.trfUtils.JobGroup
JobGroup: a set of Job objects and pieces of information relevant to a given set of Job objects.
Definition: trfUtils.py:787
python.trfValidation.athenaLogFileReport._levelCounter
_levelCounter
Definition: trfValidation.py:252
python.trfExeStepTools.getExecutorStepEventCounts
def getExecutorStepEventCounts(executor, argdict=None)
Definition: trfExeStepTools.py:44
python.trfValidation.scriptLogFileReport.resetReport
def resetReport(self)
Definition: trfValidation.py:685
TrigJetMonitorAlgorithm.items
items
Definition: TrigJetMonitorAlgorithm.py:71
python.trfValidation.athenaLogFileReport.scanLogFile
def scanLogFile(self, resetReport=False)
Definition: trfValidation.py:286
python.trfValidation.eventMatch._executor
_executor
Definition: trfValidation.py:922
python.trfValidation.scriptLogFileReport.__str__
def __str__(self)
Definition: trfValidation.py:733
Trk::open
@ open
Definition: BinningType.h:40
python.trfValidation.athenaLogFileReport._regExp
_regExp
Definition: trfValidation.py:227
python.trfValidation.athenaLogFileReport.__str__
def __str__(self)
Definition: trfValidation.py:674
python.trfValidation.logFileReport._re
_re
Definition: trfValidation.py:187
python.trfValidation.athenaLogFileReport.__init__
def __init__(self, logfile, substepName=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'], ignoreList=None)
Class constructor.
Definition: trfValidation.py:216
VKalVrtAthena::varHolder_detail::clear
void clear(T &var)
Definition: NtupleVars.h:48
PyJobTransforms.trfUtils
Transform utility functions.
python.trfValidation.eventMatch.eventCount
def eventCount(self)
Definition: trfValidation.py:969
PyJobTransforms.trfLogger
Logging configuration for ATLAS job transforms.
python.trfValidation.logFileReport._msgDetails
_msgDetails
Definition: trfValidation.py:186
python.trfValidation.logFileReport
A class holding report information from scanning a logfile This is pretty much a virtual class,...
Definition: trfValidation.py:176
python.trfValidation.eventMatch._outEventDict
_outEventDict
Definition: trfValidation.py:980
python.trfValidation.scriptLogFileReport._levelCounter
_levelCounter
Definition: trfValidation.py:680
pickleTool.object
object
Definition: pickleTool.py:30
str
Definition: BTagTrackIpAccessor.cxx:11
python.trfValidation.logFileReport.__str__
def __str__(self)
Definition: trfValidation.py:204
python.trfValidation.scriptLogFileReport.worstError
def worstError(self)
Definition: trfValidation.py:718
python.trfValidation.ignorePatterns.searchPatterns
def searchPatterns(self)
Definition: trfValidation.py:120
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
python.trfValidation.scriptLogFileReport
Definition: trfValidation.py:678
python.trfValidation.athenaLogFileReport.moreDetails
def moreDetails(self, log, firstline, firstLineCount, knowledgeFile, offset=0)
Definition: trfValidation.py:456
match
bool match(std::string s1, std::string s2)
match the individual directories of two strings
Definition: hcg.cxx:356
python.LArMinBiasAlgConfig.float
float
Definition: LArMinBiasAlgConfig.py:65
python.trfValidation.athenaLogFileReport.worstError
def worstError(self)
Return the worst error found in the logfile (first error of the most serious type)
Definition: trfValidation.py:425
python.trfValidation.eventMatch
Small class used for vailiadating event counts between input and output files.
Definition: trfValidation.py:912