Handle a failure to process an event.
767{
768 ATH_MSG_VERBOSE(
"start of " << __FUNCTION__ <<
" with errorCode = " << errorCode
769 << ", context = " << eventContext << " eventID = " << eventContext.eventID());
770
771
772 Gaudi::Hive::setCurrentContext(eventContext);
773
774 auto returnFailureAndStopEventLoop = [this]() -> StatusCode {
776
778
779
780
781
783
784
785 return StatusCode::FAILURE;
786 };
787
788
789
790
792 ATH_MSG_ERROR(
"Failure occurred with OnlineErrorCode=" << errorCode
793 << " meaning there was a framework error before requesting a new event. No output will be produced for this event"
794 << " and the event loop will exit after all ongoing processing is finished.");
795 return returnFailureAndStopEventLoop();
796 }
798 ATH_MSG_ERROR(
"Failure occurred with OnlineErrorCode=" << errorCode
799 << " meaning a new event could not be correctly read. No output will be produced for this event."
800 << " The event loop will exit after all ongoing processing is finished.");
801 return returnFailureAndStopEventLoop();
802 }
804 ATH_MSG_ERROR(
"Failure occurred with OnlineErrorCode=" << errorCode
805 << " meaning there was a framework error after HLT result was already sent out."
806 << " The event loop will exit after all ongoing processing is finished.");
807 return returnFailureAndStopEventLoop();
808 }
810 ATH_MSG_ERROR(
"Failed to access the slot for the processed event, cannot produce output. OnlineErrorCode="
811 << errorCode << ". The event loop will exit after all ongoing processing is finished unless the failed event"
812 << " reaches a hard timeout sooner and this process is killed.");
813 return returnFailureAndStopEventLoop();
814 }
816
817
818
819 ATH_MSG_ERROR(
"Failure occurred with OnlineErrorCode=" << errorCode
820 << ". Cannot determine if the event processing started or not and whether a decision for this event will be"
821 << " produced. The event loop will exit after all ongoing processing is finished, which may include or"
822 << " not include the problematic event.");
823 return returnFailureAndStopEventLoop();
824 }
826 ATH_MSG_ERROR(
"Failure occurred with OnlineErrorCode=" << errorCode
827 << " meaning the Scheduler returned FAILURE when asked to give a finished event. Will keep trying to"
828 << " pop further events if there are any still in the scheduler, but this may keep repeating until"
829 << " this process is killed by hard timeout or other means. If all ongoing processing manages to finish"
830 << " then the event loop will exit.");
831 return returnFailureAndStopEventLoop();
832 }
833 if (!eventContext.valid()) {
834 ATH_MSG_ERROR(
"Failure occurred with an invalid EventContext. Likely there was a framework error before"
835 << " requesting a new event or after sending the result of a finished event. OnlineErrorCode=" << errorCode
836 << ". The event loop will exit after all ongoing processing is finished.");
837 return returnFailureAndStopEventLoop();
838 }
839
840
841
842
843 if (
m_whiteboard->selectStore(eventContext.slot()).isFailure()) {
845 }
846
847
848
849
850 std::string debugStreamName;
851 switch (errorCode) {
854 break;
857 break;
860 break;
861 default:
863 break;
864 }
865 eformat::helper::StreamTag debugStreamTag{debugStreamName, eformat::DEBUG_TAG, true};
866
867
868
869
870 std::unique_ptr<HLT::HLTResultMT> hltResultPtr;
871 StatusCode buildResultCode{StatusCode::SUCCESS};
873 if (hltResultRH.isValid() && !hltResultRH->getSerialisedData().empty()) {
874
875 hltResultPtr = std::make_unique<HLT::HLTResultMT>(*hltResultRH);
876 hltResultPtr->addErrorCode(errorCode);
877 buildResultCode &= hltResultPtr->addStreamTag(debugStreamTag);
878 } else {
879
880 hltResultPtr = std::make_unique<HLT::HLTResultMT>();
881 hltResultPtr->addErrorCode(errorCode);
882 buildResultCode &= hltResultPtr->addStreamTag(debugStreamTag);
883
885 buildResultCode &=
m_hltResultMaker->fillResult(*hltResultPtr,eventContext);
886 }
887 }
888
889
890 SG::WriteHandleKey<HLT::HLTResultMT> hltResultWHK(
m_hltResultRHKey.key()+
"_FailedEvent");
891 buildResultCode &= hltResultWHK.initialize();
893 if (buildResultCode.isFailure() || hltResultWH.record(std::move(hltResultPtr)).isFailure()) {
895
896 ATH_MSG_ERROR(
"Second failure to build or record the HLT Result in event store while handling a failed event. "
897 << "Cannot force-accept this event from HLT side, will rely on data collector to do this. "
898 << "The event loop will exit after all ongoing processing is finished.");
899 return returnFailureAndStopEventLoop();
900 }
901 ATH_MSG_ERROR(
"Failed to build or record the HLT Result in event store while handling a failed event. "
902 << "Trying again with skipped filling of the result contents (except debug stream tag).");
904 }
905
906
907
908
910 int64_t eventTimeMillisec = std::chrono::duration_cast<std::chrono::milliseconds>(eventTime).count();
911 auto monTimeAny = Monitored::Scalar<int64_t>("TotalTime", eventTimeMillisec);
912 auto monTimeAcc = Monitored::Scalar<int64_t>("TotalTimeAccepted", eventTimeMillisec);
913 Monitored::Group(
m_monTool, monTimeAny, monTimeAcc);
914
915
916
917
919 ATH_MSG_ERROR(
"The output conversion service failed in connectOutput() while handling a failed event. "
920 << "Cannot force-accept this event from HLT side, will rely on data collector to do this. "
921 << "The event loop will exit after all ongoing processing is finished.");
922 return returnFailureAndStopEventLoop();
923 }
924
925 DataObject* hltResultDO =
m_evtStore->accessData(hltResultWH.clid(),hltResultWH.key());
926 if (hltResultDO == nullptr) {
928
929 ATH_MSG_ERROR(
"Second failure to build or record the HLT Result in event store while handling a failed event. "
930 << "Cannot force-accept this event from HLT side, will rely on data collector to do this. "
931 << "The event loop will exit after all ongoing processing is finished.");
932 return returnFailureAndStopEventLoop();
933 }
934 ATH_MSG_ERROR(
"Failed to retrieve DataObject for the HLT result object while handling a failed event. "
935 << "Trying again with skipped filling of the result contents (except debug stream tag).");
937 }
938
939 IOpaqueAddress* addr = nullptr;
940 if (
m_outputCnvSvc->createRep(hltResultDO,addr).isFailure() || addr ==
nullptr) {
941 ATH_MSG_ERROR(
"Conversion of HLT result object to the output format failed while handling a failed event. "
942 << "Cannot force-accept this event from HLT side, will rely on data collector to do this. "
943 << "The event loop will exit after all ongoing processing is finished.");
944 delete addr;
945 addr = nullptr;
946 return returnFailureAndStopEventLoop();
947 }
948
950 ATH_MSG_ERROR(
"The output conversion service failed in commitOutput() while handling a failed event. "
951 << "Cannot force-accept this event from HLT side, will rely on data collector to do this. "
952 << "The event loop will exit after all ongoing processing is finished.");
953 delete addr;
954 addr = nullptr;
955 return returnFailureAndStopEventLoop();
956 }
957
958
959 delete addr;
960 addr = nullptr;
961
962
963
964
966
967
968
969
970
971 const EventContext eventContextCopy = eventContext;
974
975
976
980 }
981
982
983
984
985
986
990 << " was successfully handled, but the number of tolerable framework errors for this HltEventLoopMgr instance,"
991 <<
" which is " <<
m_maxFrameworkErrors.value() <<
", was exceeded. Current local event number is "
992 << eventContextCopy.evt() << ", slot " << eventContextCopy.slot()
993 << ". The event loop will exit after all ongoing processing is finished.");
994 return returnFailureAndStopEventLoop();
995 }
996 }
997
998
999 ATH_MSG_ERROR(
"Failed event with OnlineErrorCode=" << errorCode
1000 << " Current local event number is " << eventContextCopy.evt() << ", slot " << eventContextCopy.slot());
1001
1003 return StatusCode::SUCCESS;
1004}
StatusCode failedEvent(HLT::OnlineErrorCode errorCode, const EventContext &eventContext)
Handle a failure to process an event.
std::unique_ptr< HLT::LoopThread > m_inputThread
Input handling thread (triggers reading new events)
EventLoopStatus m_loopStatus
Object keeping track of the event loop status.
Gaudi::Property< std::string > m_truncationDebugStreamName
StatusCode clearWBSlot(size_t evtSlot) const
Clear an event slot in the whiteboard.
Gaudi::Property< std::string > m_timeoutDebugStreamName
Gaudi::Property< std::string > m_algErrorDebugStreamName
std::atomic< size_t > m_freeSlots
Number of free slots used to synchronise input/output tasks.
Gaudi::Property< int > m_maxFrameworkErrors
ServiceHandle< IConversionSvc > m_outputCnvSvc
Gaudi::Property< std::string > m_fwkErrorDebugStreamName
std::atomic< int > m_nFrameworkErrors
Counter of framework errors.
ToolHandle< HLTResultMTMaker > m_hltResultMaker
void resetEventTimer(const EventContext &eventContext, bool processing)
Reset the timeout flag and the timer, and mark the slot as busy or idle according to the second argum...
SG::ReadHandleKey< HLT::HLTResultMT > m_hltResultRHKey
StoreGate key for reading the HLT result.
constexpr bool isEventProcessingErrorCode(const OnlineErrorCode code)
SG::ReadCondHandle< T > makeHandle(const SG::ReadCondHandleKey< T > &key, const EventContext &ctx=Gaudi::Hive::currentContext())