18from subprocess
import Popen, STDOUT, PIPE
21msg = logging.getLogger(__name__)
23from PyUtils
import RootUtils
25from PyJobTransforms.trfExeStepTools
import getExecutorStepEventCounts
30import PyJobTransforms.trfExceptions
as trfExceptions
36 if not os.access(filename, os.R_OK):
37 msg.info(
"ERROR can't access file %s", filename)
40 ROOT = RootUtils.import_root()
43 f = ROOT.TFile.Open(filename)
45 msg.info(
"Can't open file %s", filename)
50 keys = f.GetListOfKeys()
55 if not isinstance(t, ROOT.TTree):
return
57 msg.info(
"Can't get tree %s from file %s", tn, filename)
61 if (verbose): msg.info(
"Working on tree %s", tn)
62 n = t.GetEntriesFast()
66 msg.info(
"Tree %s: Found corruption in event %i", i, n)
70 if verbose
and i > 0
and i % 100 == 0:
71 msg.info(
"Checking event %s", i)
72 msg.info(
"Tree %s: %i event(s) ok", tn, n)
75 if tn ==
'CollectionTree':
80 msg.info(
"ROOT file %s looks ok", filename)
82 msg.info(
"Failed to determine number of events in file %s. No tree named 'CollectionTree'", filename)
89 cmd = [
'AtlListBSEvents',
'-c', filename]
90 p = Popen(cmd, shell=
False, stdout=PIPE, stderr=STDOUT, close_fds=
True)
91 while p.poll()
is None:
92 line = p.stdout.readline()
94 msg.info(
"AtlListBSEvents Report: %s", line.strip())
106 def __init__(self, files=['atlas_error_mask.db'], extraSearch = []):
124 for patternFile
in files:
125 if patternFile ==
"None":
127 fullName = trfUtils.findFile(os.environ[
'DATAPATH'], patternFile)
129 msg.warning(
'Error pattern file {0} could not be found in DATAPATH'.format(patternFile))
132 with open(fullName)
as patternFileHandle:
133 msg.debug(
'Opened error file {0} from here: {1}'.format(patternFile, fullName))
135 for line
in patternFileHandle:
137 if line.startswith(
'#')
or line ==
'':
141 (who, level, message) = [ s.strip()
for s
in line.split(
',', 2) ]
145 reWho = re.compile(who)
146 reMessage = re.compile(message)
148 msg.warning(
'Could not parse this line as a valid error pattern: {0}'.format(line))
150 except re.error
as e:
151 msg.warning(
'Could not parse valid regexp from {0}: {1}'.format(message, e))
154 msg.debug(
'Successfully parsed: who={0}, level={1}, message={2}'.format(who, level, message))
158 except OSError
as xxx_todo_changeme:
159 (errno, errMsg) = xxx_todo_changeme.args
160 msg.warning(
'Failed to open error pattern file {0}: {1} ({2})'.format(fullName, errMsg, errno))
164 for string
in searchStrings:
167 msg.debug(
'Successfully parsed additional logfile search string: {0}'.format(string))
168 except re.error
as e:
169 msg.warning(
'Could not parse valid regexp from {0}: {1}'.format(string, e))
177 def __init__(self, logfile=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR']):
180 if isinstance(logfile, str):
216 def __init__(self, logfile, substepName=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'], ignoreList=None):
227 self.
_regExp = re.compile(
r'(?P<service>[^\s]+\w)(.*)\s+(?P<level>' +
'|'.join(stdLogLevels) +
r')\s+(?P<message>.*)')
229 self.
_metaPat = re.compile(
r"MetaData:\s+(.*?)\s*=\s*(.*)$")
237 super(athenaLogFileReport, self).
__init__(logfile, msgLimit, msgDetailLevel)
243 errorDict = {
'countSummary': {},
'details': {}}
245 errorDict[
'countSummary'][level] = count
247 errorDict[
'details'][level] = []
249 errorDict[
'details'][level].append(error)
254 for level
in list(stdLogLevels) + [
'UNKNOWN',
'IGNORED']:
271 fullName = trfUtils.findFile(os.environ[
'DATAPATH'], knowledgefile)
273 msg.warning(
'Knowledge file {0} could not be found in DATAPATH'.format(knowledgefile))
276 with open(fullName)
as knowledgeFileHandle:
277 msg.debug(
'Opened knowledge file {0} from here: {1}'.format(knowledgefile, fullName))
279 for line
in knowledgeFileHandle:
280 if line.startswith(
'#')
or line ==
'' or line ==
'\n':
282 line = line.rstrip(
'\n')
283 linesList.append(line)
285 msg.warning(
'Failed to open knowledge file {0}: {1}'.format(fullName, e))
296 msg.debug(
'Now scanning logfile {0}'.format(log))
297 seenNonStandardError =
''
298 customLogParser =
None
299 if log ==
'log.generate':
300 from EvgenProdTools.EvgenParserTool
import evgenParserTool
301 customLogParser = evgenParserTool()
304 myGen = trfUtils.lineByLine(log, substepName=self.
_substepName)
306 msg.error(
'Failed to open transform logfile {0}: {1:s}'.format(log, e))
309 self.
_errorDetails[
'ERROR'] = {
'message': str(e),
'firstLine': 0,
'count': 1}
313 for line, lineCounter
in myGen:
314 if '===>>> start processing event' in line: inEventLoop =
True
315 if 'Application Manager Stopped successfully' in line: inEventLoop =
False
318 if customLogParser
is not None:
319 customLogParser.processLine(line)
323 key, value = m.groups()
330 if 'Core dump from CoreDumpSvc' in line:
331 msg.warning(
'Detected CoreDumpSvc report - activating core dump svc grabber')
335 if 'G4Exception-START' in line:
336 msg.warning(
'Detected G4 exception report - activating G4 exception grabber')
339 if '*** G4Exception' in line:
340 msg.warning(
'Detected G4 9.4 exception report - activating G4 exception grabber')
344 if 'Shortened traceback (most recent user call last)' in line:
345 msg.warning(
'Detected python exception - activating python exception grabber')
349 if 'terminate called after throwing an instance of \'std::bad_alloc\'' in line:
350 msg.warning(
'Detected bad_alloc!')
355 if 'Error in <TFile::ReadBuffer>' in line:
359 if 'Error in <TFile::WriteBuffer>' in line:
363 if any(line
in l
for l
in nonStandardErrorsList):
364 seenNonStandardError = line
367 msg.debug(
'Non-standard line in %s: %s', log, line)
373 for matchKey
in (
'service',
'level',
'message'):
374 fields[matchKey] = m.group(matchKey)
375 msg.debug(
'Line parsed as: {0}'.format(fields))
379 if (fields[
'level'] ==
'WARNING')
and inEventLoop:
384 for ignorePat
in self.
_ignoreList.structuredPatterns:
385 serviceMatch = ignorePat[
'service'].
match(fields[
'service'])
386 levelMatch = (ignorePat[
'level'] ==
"" or ignorePat[
'level'] == fields[
'level'])
387 messageMatch = ignorePat[
'message'].
match(fields[
'message'])
388 if serviceMatch
and levelMatch
and messageMatch:
389 msg.info(
'Error message "{0}" was ignored at line {1} (structured match)'.format(line, lineCounter))
392 if ignoreFlag
is False:
394 if searchPat.search(line):
395 msg.info(
'Error message "{0}" was ignored at line {1} (search match)'.format(line, lineCounter))
400 fields[
'level'] =
'IGNORED'
406 if 'std::bad_alloc' in fields[
'message']:
407 fields[
'level'] =
'CATASTROPHE'
410 if fields[
'level'] ==
'FATAL':
411 if seenNonStandardError:
412 line +=
'; ' + seenNonStandardError
419 if fields[
'level'] ==
'IGNORED' or stdLogLevels[fields[
'level']] >= self.
_msgDetails:
421 detailsHandled =
False
423 if seenError[
'message'] == line:
424 seenError[
'count'] += 1
425 detailsHandled =
True
427 if detailsHandled
is False:
428 self.
_errorDetails[fields[
'level']].append({
'message': line,
'firstLine': lineCounter,
'count': 1})
430 msg.warning(
"Found message number {0} at level {1} - this and further messages will be supressed from the report".format(self.
_levelCounter[fields[
'level']], fields[
'level']))
434 if 'Total payload read from IOVDb' in fields[
'message']:
435 msg.debug(
"Found COOL payload information at line {0}".format(line))
436 a = re.match(
r'(\D+)(?P<bytes>\d+)(\D+)(?P<time>\d+[.]?\d*)(\D+)', fields[
'message'])
437 self.
_dbbytes += int(a.group(
'bytes'))
438 self.
_dbtime += float(a.group(
'time'))
440 if customLogParser
is not None:
441 customLogParser.report()
450 worst = stdLogLevels[
'DEBUG']
453 if count > 0
and stdLogLevels.get(lvl, 0) > worst:
455 worst = stdLogLevels[lvl]
461 return {
'level': worstName,
'nLevel': worst,
'firstError': firstError}
465 firstLine = firstError =
None
466 firstLevel = stdLogLevels[floor]
469 if (count > 0
and stdLogLevels.get(lvl, 0) >= stdLogLevels[floor]
and
470 (firstError
is None or self.
_errorDetails[lvl][0][
'firstLine'] < firstLine)):
472 firstLevel = stdLogLevels[lvl]
476 return {
'level': firstName,
'nLevel': firstLevel,
'firstError': firstError}
479 eventLoopWarnings = []
481 if item
in [element[
'item']
for element
in eventLoopWarnings]:
484 eventLoopWarnings.append({
'item':item,
'count': count})
485 return eventLoopWarnings
487 def moreDetails(self, log, firstline, firstLineCount, knowledgeFile, offset=0):
491 linesToBeScanned = 50
492 seenAbnormalLines = []
493 abnormalLinesReport = {}
494 lastNormalLineReport = {}
497 myGen = trfUtils.lineByLine(log)
498 for line, linecounter
in myGen:
499 if linecounter
in range(firstLineCount - linesToBeScanned, firstLineCount-offset):
500 linesList.append([linecounter, line])
501 elif linecounter == firstLineCount:
504 for linecounter, line
in reversed(linesList):
505 if re.findall(
r'|'.join(abnormalLinesList), line):
507 for dic
in seenAbnormalLines:
509 if dic[
'message'] == line
or dic[
'message'][0:15] == line[0:15]:
513 if seenLine
is False:
514 seenAbnormalLines.append({
'message': line,
'firstLine': linecounter,
'count': 1})
517 lastNormalLineReport = {
'message': line,
'firstLine': linecounter,
'count': 1}
526 for a
in range(len(seenAbnormalLines)):
527 abnormalLinesReport.update({
'message{0}'.format(a): seenAbnormalLines[a][
'message'],
'firstLine{0}'.format(a): seenAbnormalLines[a][
'firstLine'],
528 'count{0}'.format(a): seenAbnormalLines[a][
'count']})
530 return {
'abnormalLines': abnormalLinesReport,
'lastNormalLine': lastNormalLineReport}
540 _eventCounter = _run = _event = _currentAlgorithm = _functionLine = _currentFunction =
None
541 coreDumpReport =
'Core dump from CoreDumpSvc'
544 coreDumpDetailsReport = {}
546 for line, linecounter
in lineGenerator:
549 if 'Caught signal 11(Segmentation fault)' in line:
550 coreDumpReport =
'Segmentation fault'
551 if 'Event counter' in line:
555 if 'EventID' in line:
556 match = re.findall(
r'\[.*?\]', line)
557 if match
and match.__len__() >= 2:
560 keys = (match[0].
strip(brackets)).
split(commaDelimer)
561 values = (match[1].
strip(brackets)).
split(commaDelimer)
564 _run =
'Run: ' + values[keys.index(
'Run')]
567 _event =
'Evt: ' + values[keys.index(
'Evt')]
569 if 'Current algorithm' in line:
570 _currentAlgorithm = line
571 if '<signal handler called>' in line:
572 _functionLine = linecounter+1
573 if _functionLine
and linecounter
is _functionLine:
575 _currentFunction =
'Current Function: ' + line.split(
' in ')[1].
split()[0]
577 _currentFunction =
'Current Function: ' + line.split()[1]
585 _eventCounter =
'Event counter: unknown' if not _eventCounter
else _eventCounter
586 _run =
'Run: unknown' if not _run
else _run
587 _event =
'Evt: unknown' if not _event
else _event
588 _currentAlgorithm =
'Current algorithm: unknown' if not _currentAlgorithm
else _currentAlgorithm
589 _currentFunction =
'Current Function: unknown' if not _currentFunction
else _currentFunction
590 coreDumpReport =
'{0}: {1}; {2}; {3}; {4}; {5}'.format(coreDumpReport, _eventCounter, _run, _event, _currentAlgorithm, _currentFunction)
592 coreDumpDetailsReport = self.
moreDetails(log, firstline, firstLineCount,
'knowledgeFile.db', offset)
593 abnormalLines = coreDumpDetailsReport[
'abnormalLines']
596 if 'message0' in abnormalLines.keys():
597 coreDumpReport +=
'; Abnormal line seen just before core dump: ' + abnormalLines[
'message0'][0:30] +
'...[truncated] ' +
'(see the jobReport)'
600 msg.debug(
'Identified core dump - adding to error detail report')
602 self.
_errorDetails[
'FATAL'].append({
'moreDetails': coreDumpDetailsReport,
'message': coreDumpReport,
'firstLine': firstLineCount,
'count': 1})
608 if 'Aborting execution' not in g4Report:
609 for line, linecounter
in lineGenerator:
610 g4Report += os.linesep + line
616 msg.warning(
'G4 exception closing string not found within {0} log lines of line {1}'.format(g4lines, firstLineCount))
620 msg.debug(
'Identified G4 exception - adding to error detail report')
621 if "just a warning" in g4Report:
624 self.
_errorDetails[
'WARNING'].append({
'message': g4Report,
'firstLine': firstLineCount,
'count': 1})
626 msg.warning(
"Found message number {0} at level WARNING - this and further messages will be supressed from the report".format(self.
_levelCounter[
'WARNING']))
629 self.
_errorDetails[
'FATAL'].append({
'message': g4Report,
'firstLine': firstLineCount,
'count': 1})
634 for line, linecounter
in lineGenerator:
635 g4Report += os.linesep + line
638 if 'G4Exception-END' in line:
640 if g4lines >= g4ExceptionLineDepth:
641 msg.warning(
'G4 exception closing string not found within {0} log lines of line {1}'.format(g4lines, firstLineCount))
645 msg.debug(
'Identified G4 exception - adding to error detail report')
646 if "-------- WWWW -------" in g4Report:
649 self.
_errorDetails[
'WARNING'].append({
'message': g4Report,
'firstLine': firstLineCount,
'count': 1})
651 msg.warning(
"Found message number {0} at level WARNING - this and further messages will be supressed from the report".format(self.
_levelCounter[
'WARNING']))
654 self.
_errorDetails[
'FATAL'].append({
'message': g4Report,
'firstLine': firstLineCount,
'count': 1})
658 pythonExceptionReport =
""
660 lastLine2 = firstline
661 pythonErrorLine = firstLineCount
663 for line, linecounter
in lineGenerator:
664 if 'Py:Athena' in line
and 'INFO leaving with code' in line:
666 pythonExceptionReport = lastLine
667 pythonErrorLine = linecounter-1
669 pythonExceptionReport = lastLine2
670 pythonErrorLine = linecounter-2
673 msg.warning(
'Could not identify python exception correctly scanning {0} log lines after line {1}'.format(pyLines, firstLineCount))
674 pythonExceptionReport =
"Unable to identify specific exception"
675 pythonErrorLine = firstLineCount
681 pythonExceptionDetailsReport = self.
moreDetails(log, firstline, firstLineCount,
'knowledgeFile.db')
682 abnormalLines = pythonExceptionDetailsReport[
'abnormalLines']
685 if 'message0' in abnormalLines.keys():
686 pythonExceptionReport +=
'; Abnormal line seen just before python exception: ' + abnormalLines[
'message0'][0:30] +
'...[truncated] ' +
'(see the jobReport)'
688 msg.debug(
'Identified python exception - adding to error detail report')
690 self.
_errorDetails[
'FATAL'].append({
'moreDetails': pythonExceptionDetailsReport,
'message': pythonExceptionReport,
'firstLine': pythonErrorLine,
'count': 1})
694 badAllocExceptionReport =
'terminate after \'std::bad_alloc\'.'
696 msg.debug(
'Identified bad_alloc - adding to error detail report')
698 self.
_errorDetails[
'CATASTROPHE'].append({
'message': badAllocExceptionReport,
'firstLine': firstLineCount,
'count': 1})
701 msg.debug(
'Identified ROOT IO problem - adding to error detail report')
703 self.
_errorDetails[
'FATAL'].append({
'message': firstline,
'firstLine': firstLineCount,
'count': 1})
710 def __init__(self, logfile=None, msgLimit=200, msgDetailLevel=stdLogLevels['ERROR']):
714 super(scriptLogFileReport, self).
__init__(logfile, msgLimit, msgDetailLevel)
718 for level
in list(stdLogLevels) + [
'UNKNOWN',
'IGNORED']:
730 msg.info(
'Scanning logfile {0}'.format(log))
732 myGen = trfUtils.lineByLine(log)
734 msg.error(
'Failed to open transform logfile {0}: {1:s}'.format(log, e))
737 self.
_errorDetails[
'ERROR'] = {
'message': str(e),
'firstLine': 0,
'count': 1}
740 for line, lineCounter
in myGen:
744 if line.__contains__(
'Error in <TFile::ReadBuffer>')
or \
745 line.__contains__(
'Error in <TFile::WriteBuffer>'):
750 worstlevelName =
'DEBUG'
751 worstLevel = stdLogLevels[worstlevelName]
753 if count > 0
and stdLogLevels.get(levelName, 0) > worstLevel:
754 worstlevelName = levelName
755 worstLevel = stdLogLevels[levelName]
762 return {
'level': worstlevelName,
'nLevel': worstLevel,
'firstError': firstError}
768 msg.debug(
'Identified ROOT IO problem - adding to error detail report')
770 self.
_errorDetails[
'FATAL'].append({
'message': line,
'firstLine': lineCounter,
'count': 1})
778 except Exception
as exception:
779 msg.error(
'Failed to import module PyJobTransforms.trfFileValidationFunctions with error {error}'.format(error = exception))
782 import multiprocessing
784 level = kwargs.get(
'level')
785 if level
is not None:
786 if level < msg.getEffectiveLevel():
788 msg.debug(f
"Set logging level of {msg.name!r} to {logging.getLevelName(level)!r}")
790 msg.debug(f
"Current process: {multiprocessing.current_process().name}")
792 validationFunction = getattr(trfFileValidationFunctions, functionName)
793 msg.debug(f
"Calling {validationFunction.__name__}({file}, "
794 f
"{",
".join(f"{k}={v}
" for k, v in kwargs.items())})")
795 return validationFunction(file, **kwargs)
803 if multithreadedMode:
804 os.environ[
'TRF_MULTITHREADED_VALIDATION'] =
'TRUE'
805 if parallelMode
is False:
806 msg.info(
'Starting legacy (serial) file validation')
807 for (key, arg)
in dictionary.items():
808 if not isinstance(arg, argFile):
812 if arg.auxiliaryFile:
815 msg.info(
'Validating data type %s...', key)
817 for fname
in arg.value:
818 msg.info(
'Validating file %s...', fname)
821 msg.info(
'{0}: Testing corruption...'.format(fname))
822 if arg.getSingleMetadata(fname,
'integrity')
is True:
823 msg.info(
'Corruption test passed.')
824 elif arg.getSingleMetadata(fname,
'integrity')
is False:
825 msg.error(
'Corruption test failed.')
827 elif arg.getSingleMetadata(fname,
'integrity') ==
'UNDEFINED':
828 msg.info(
'No corruption test defined.')
829 elif arg.getSingleMetadata(fname,
'integrity')
is None:
830 msg.error(
'Could not check for file integrity')
833 msg.error(
'Unknown rc from corruption test.')
837 msg.info(
'{0}: Testing event count...'.format(fname))
838 if arg.getSingleMetadata(fname,
'nentries')
is not None:
839 msg.info(
'Event counting test passed ({0!s} events).'.format(arg.getSingleMetadata(fname,
'nentries')))
841 msg.error(
'Event counting test failed.')
845 msg.info(
'{0}: Checking if guid exists...'.format(fname))
846 if arg.getSingleMetadata(fname,
'file_guid')
is None:
847 msg.error(
'Guid could not be determined.')
849 elif arg.getSingleMetadata(fname,
'file_guid') ==
'UNDEFINED':
850 msg.info(
'Guid not defined.')
852 msg.info(
'Guid is %s', arg.getSingleMetadata(fname,
'file_guid'))
853 msg.info(
'Stopping legacy (serial) file validation')
854 elif parallelMode
is True:
855 msg.info(
'Starting parallel file validation')
861 integrityFunctionList = []
865 msg.debug(
'Collating list of files for validation')
866 for (key, arg)
in dictionary.items():
867 if not isinstance(arg, argFile):
871 for fname
in arg.value:
872 msg.debug(
'Appending file {fileName} to list of files for validation'.format(fileName = str(fname)))
874 fileList.append(fname)
882 integrityFunctionList.append(arg.integrityFunction)
883 except AttributeError
as e:
884 errmsg = f
'Validation function for file {fname} of type'\
885 f
' {type(arg).__name__!r} not available for parallel file validation: {e}'
888 trfExit.nameToCode(
'TRF_EXEC_VALIDATION_FAIL'), errmsg)
894 name =
"validation of file {fileName}".format(
895 fileName = str(fname)),
896 workFunction = returnIntegrityOfFile,
897 workFunctionKeywordArguments = {
899 'functionName': arg.integrityFunction,
900 'level': msg.getEffectiveLevel(),
902 workFunctionTimeout = 600
909 name =
"standard file validation",
915 msg.info(
'Submitting file validation jobs to parallel job processor')
916 parallelJobProcessor1.submit(jobSubmission = jobGroup1)
917 resultsList = parallelJobProcessor1.getResults()
918 msg.info(
'Parallel file validation complete')
921 msg.info(
'Processing file integrity results')
922 for currentFile, currentArg, currentIntegrityFunction, currentResult
in zip(fileList, argList, integrityFunctionList, resultsList):
923 msg.info(
'{IO} file {fileName} has integrity status {integrityStatus} as determined by integrity function {integrityFunction}'.format(
925 fileName = str(currentFile),
926 integrityStatus = str(currentResult),
927 integrityFunction = str(currentIntegrityFunction)
932 if currentResult[0]
is True:
933 msg.info(
'Updating integrity metadata for file {fileName}'.format(fileName = str(currentFile)))
934 currentArg._setMetadata(files=[currentFile,], metadataKeys={
'integrity': currentResult[0]})
936 exceptionMessage =
"{IO} file validation failure on file {fileName} with integrity status {integrityStatus} as determined by integrity function {integrityFunction}".format(
938 fileName = str(currentFile),
939 integrityStatus = str(currentResult),
940 integrityFunction = str(currentIntegrityFunction)
942 msg.error(
"exception message: {exceptionMessage}".format(
943 exceptionMessage = exceptionMessage
945 exitCodeName =
'TRF_OUTPUT_FILE_VALIDATION_FAIL'
947 trfExit.nameToCode(exitCodeName),
952 if currentArg.getSingleMetadata(currentFile, metadataKey =
'integrity', populate =
False) == currentResult[0]:
953 msg.debug(
"file integrity metadata update successful")
955 msg.error(
"file integrity metadata update unsuccessful")
957 metadataKeys = (
'nentries',
'file_guid')
958 msg.info(f
"{",
".join(fileList)}: Checking {",
".join(map(repr, metadataKeys))} ...")
959 metadata = {fname: arg.getMetadata(fname, metadataKeys=metadataKeys)[fname]
960 for fname, arg
in zip(fileList, argList, strict=
True)}
961 success = {fname: md
for fname, md
in metadata.items()
if None not in md.values()}
963 msg.info(f
"Checked\n\t{"\n\t
".join(
964 f"{fname}: {" ".join(f"{k}={v}
" for k, v in md.items())}"
965 for fname, md
in success.items())}
")
966 if len(success) != len(metadata):
967 errmsg = f
"{",
".join(fname for fname in metadata if fname not in success)}:" \
968 f
" Could not determine '{"' and/or '".join(metadataKeys)}'"
971 msg.info(
'Stopping parallel file validation')
984 def __init__(self, executor, eventCountConf=None, eventCountConfOverwrite=False):
1001 self.
_eventCountConf[
'EVNT'] = {
'EVNT_MRG':
"match",
"HITS": simEventEff,
"EVNT_TR":
"filter",
"DAOD_TRUTH*" :
"match"}
1003 self.
_eventCountConf[
'HITS'] = {
'RDO':
"match",
'HITS_RSM': simEventEff,
"HITS_MRG":
"match",
'HITS_FILT': simEventEff,
"RDO_FILT":
"filter",
"DAOD_TRUTH*" :
"match",
"HIST_SIM" :
"match"}
1004 self.
_eventCountConf[
'BS'] = {
'ESD':
"match",
'DRAW_*':
"filter",
'NTUP_*':
"filter",
"BS_MRG":
"match",
'DESD*':
"filter",
'AOD':
"match",
'DAOD*':
"filter",
"DAOD_PHYS":
"match",
"DAOD_PHYSLITE":
"match"}
1005 self.
_eventCountConf[
'RDO*'] = {
'ESD':
"match",
'DRAW_*':
"filter",
'NTUP_*':
"filter",
"RDO_MRG":
"match",
"RDO_TRIG":
"match",
'AOD':
"match",
'DAOD*':
"filter",
"DAOD_PHYS":
"match",
"DAOD_PHYSLITE":
"match",
"HIST_DIGI":
"match"}
1006 self.
_eventCountConf[
'ESD'] = {
'ESD_MRG':
"match",
'AOD':
"match",
'DESD*':
"filter",
'DAOD_*':
"filter",
'NTUP_*':
"filter",
"DAOD_PHYS":
"match",
"DAOD_PHYSLITE":
"match"}
1007 self.
_eventCountConf[
'AOD'] = {
'AOD_MRG' :
"match",
'TAG':
"match",
"NTUP_*":
"filter",
"DAOD_*":
"filter",
"DAOD_PHYS":
"match",
"DAOD_PHYSLITE":
"match"}
1019 if eventCountConfOverwrite
is True:
1024 msg.debug(
'Event count check configuration is: {0}'.format(self.
_eventCountConf))
1026 msg.debug(
'Event count check ready for executor {0}'.format(self.
_executor.name))
1041 msg.info(
'Overriding check configuration with: {0}'.format(override))
1050 for dataTypeName
in self.
_executor.input:
1053 msg.debug(
'Input data type {0} has {1} events'.format(dataTypeName, self.
_inEventDict[dataTypeName]))
1055 msg.warning(
'Found no dataDictionary entry for input data type {0}'.format(dataTypeName))
1059 for dataTypeName
in self.
_executor.output:
1062 msg.debug(
'Output data type {0} has {1} events'.format(dataTypeName, self.
_outEventDict[dataTypeName]))
1064 msg.warning(
'Found no dataDictionary entry for output data type {0}'.format(dataTypeName))
1067 if "skipEvents" in self.
_executor.conf.argdict:
1073 if "maxEvents" in self.
_executor.conf.argdict:
1082 executorEventCounts, executorEventSkips = getExecutorStepEventCounts(self.
_executor)
1087 if "eventAcceptanceEfficiency" in self.
_executor.conf.argdict:
1102 for inData, neventsInData
in self.
_inEventDict.items():
1103 if not isinstance(neventsInData, int):
1104 msg.warning(
'File size metadata for {inData} was not countable, found {neventsInData}. No event checks possible for this input data.'.format(inData=inData, neventsInData=neventsInData))
1110 matchedInData =
False
1112 if fnmatch.fnmatch(inData, inDataKey):
1113 msg.info(
"Matched input data type {inData} to {inDataKey} by globbing".format(inData=inData, inDataKey=inDataKey))
1114 matchedInData =
True
1116 if not matchedInData:
1117 msg.warning(
'No defined event count match for {inData} -> {outData}, so no check(s) possible in this case.'.format(inData=inData, outData=list(self.
_outEventDict)))
1121 expectedEvents = neventsInData
1124 if expectedEvents < 0:
1125 msg.warning(
'skipEvents was set higher than the input events in {inData}: {skipEvents} > {neventsInData}. This is not an error, but it is not a normal configuration. Expected events is now 0.'.format(inData=inData, skipEvents=self.
_skipEvents, neventsInData=neventsInData))
1130 msg.warning(
'maxEvents was set higher than inputEvents-skipEvents for {inData}: {maxEvents} > {neventsInData}-{skipEvents}. This is not an error, but it is not a normal configuration. Expected events remains {expectedEvents}.'.format(inData=inData, maxEvents=self.
_maxEvents, neventsInData=neventsInData, skipEvents=self.
_skipEvents, expectedEvents=expectedEvents))
1132 msg.warning(
'maxEvents was set higher than inputEvents for {inData}: {maxEvents} > {neventsInData}. This is not an error, but it is not a normal configuration. Expected events remains {expectedEvents}.'.format(inData=inData, maxEvents=self.
_maxEvents, neventsInData=neventsInData, expectedEvents=expectedEvents))
1135 msg.debug(
'Expected number of processed events for {0} is {1}'.format(inData, expectedEvents))
1139 if not isinstance(neventsOutData, int):
1140 msg.warning(
'File size metadata for {outData} was not countable, found "{neventsOutData}". No event checks possible for this output data.'.format(outData=outData, neventsOutData=neventsOutData))
1144 outDataKey = outData
1148 for outDataKey, outDataConf
in self.
_eventCountConf[inDataKey].items():
1149 if fnmatch.fnmatch(outData, outDataKey):
1150 msg.info(
'Matched output data type {outData} to {outDatakey} by globbing'.format(outData=outData, outDatakey=outDataKey))
1151 outDataKey = outData
1152 checkConf = outDataConf
1155 msg.warning(
'No defined event count match for {inData} -> {outData}, so no check possible in this case.'.format(inData=inData, outData=outData))
1157 msg.debug(
'Event count check for {inData} to {outData} is {checkConf}'.format(inData=inData, outData=outData, checkConf=checkConf))
1160 if checkConf ==
'match':
1162 if neventsOutData == expectedEvents:
1163 msg.info(
"Event count check for {inData} to {outData} passed: all processed events found ({neventsOutData} output events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData))
1166 'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1167 elif checkConf ==
'filter':
1168 if neventsOutData <= expectedEvents
and neventsOutData >= 0:
1169 msg.info(
"Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1172 'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from 0 to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1173 elif checkConf ==
'minEff':
1174 if neventsOutData >= int(expectedEvents * self.
_evAccEff)
and neventsOutData <= expectedEvents:
1175 msg.info(
"Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1178 'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from {minEvents} to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData,
1179 minEvents=int(expectedEvents * self.
_evAccEff), expectedEvents=expectedEvents))
1180 elif isinstance(checkConf, (float, int)):
1181 checkConf = float(checkConf)
1182 if checkConf < 0.0
or checkConf > 1.0:
1184 'Event count check for {inData} to {outData} is misconfigured: the efficiency factor of {eff} is not between 0 and 1.'.format(inData=inData, outData=outData, eff=checkConf))
1185 if neventsOutData >= int(expectedEvents * checkConf)
and neventsOutData <= expectedEvents:
1186 msg.info(
"Event count check for {inData} to {outData} passed: found ({neventsOutData} output events selected from {expectedEvents} processed events)".format(inData=inData, outData=outData, neventsOutData=neventsOutData, expectedEvents=expectedEvents))
1189 'Event count check for {inData} to {outData} failed: found {neventsOutData} events, expected from {minEvents} to {expectedEvents}'.format(inData=inData, outData=outData, neventsOutData=neventsOutData,
1190 minEvents=int(expectedEvents * checkConf), expectedEvents=expectedEvents))
1193 'Unrecognised event count configuration for {inData} to {outData}: "{conf}" is not known'.format(inData=inData, outData=outData, conf=checkConf))
JobGroup: a set of Job objects and pieces of information relevant to a given set of Job objects.
Job: a set of pieces of information relevant to a given work function.
ParallelJobProcessor: a multiple-process processor of Job objects.
Logfile suitable for scanning logfiles with an athena flavour, i.e., lines of the form "SERVICE LOGL...
firstError(self, floor='ERROR')
Return the first error found in the logfile above a certain loglevel.
rootSysErrorParser(self, lineGenerator, firstline, firstLineCount)
dbMonitor(self)
Return data volume and time spend to retrieve information from the database.
__init__(self, logfile, substepName=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'], ignoreList=None)
Class constructor.
badAllocExceptionParser(self, lineGenerator, firstline, firstLineCount)
knowledgeFileHandler(self, knowledgefile)
Generally, a knowledge file consists of non-standard logging error/abnormal lines which are left out ...
moreDetails(self, log, firstline, firstLineCount, knowledgeFile, offset=0)
scanLogFile(self, resetReport=False)
g494ExceptionParser(self, lineGenerator, firstline, firstLineCount)
worstError(self)
Return the worst error found in the logfile (first error of the most serious type)
pythonExceptionParser(self, log, lineGenerator, firstline, firstLineCount)
coreDumpSvcParser(self, log, lineGenerator, firstline, firstLineCount)
Attempt to suck a core dump report from the current logfile This function scans logs in two different...
g4ExceptionParser(self, lineGenerator, firstline, firstLineCount, g4ExceptionLineDepth)
Small class used for vailiadating event counts between input and output files.
__init__(self, executor, eventCountConf=None, eventCountConfOverwrite=False)
check in- and output event counts
decide(self)
Perform an event count check.
configureCheck(self, override=False)
Setup the parameters needed to define particular checks.
Class of patterns that can be ignored from athena logfiles.
_initialiseSerches(self, searchStrings=[])
__init__(self, files=['atlas_error_mask.db'], extraSearch=[])
Load error patterns from files.
_initalisePatterns(self, files)
A class holding report information from scanning a logfile This is pretty much a virtual class,...
__init__(self, logfile=None, msgLimit=10, msgDetailLevel=stdLogLevels['ERROR'])
scanLogFile(self, resetReport=False)
__init__(self, logfile=None, msgLimit=200, msgDetailLevel=stdLogLevels['ERROR'])
rootSysErrorParser(self, line, lineCounter)
void search(TDirectory *td, const std::string &s, std::string cwd, node *n)
recursive directory search for TH1 and TH2 and TProfiles
int count(std::string s, const std::string ®x)
count how many occurances of a regx are in a string
std::vector< std::string > split(const std::string &s, const std::string &t=":")
bool match(std::string s1, std::string s2)
match the individual directories of two strings
corruptionTestBS(filename)
performStandardFileValidation(dictionary, io, parallelMode=False, multithreadedMode=False)
perform standard file validation @ detail This method performs standard file validation in either ser...
returnIntegrityOfFile(file, functionName, **kwargs)
return integrity of file using appropriate validation function @ detail This method returns the integ...
corruptionTestPool(filename, verbose=False)