#include <OnnxUtil.h>

Collaboration diagram for OnnxUtil:

Public Member Functions
	OnnxUtil (const std::string &name)

	~OnnxUtil ()=default

void	initialize ()

void	runInference (const std::vector< std::vector< float >> &node_feat, std::vector< float > &effAllJet) const

void	runInference (const std::vector< std::vector< float >> &node_feat, std::vector< std::vector< float >> &effAllJetAllWp) const

Private Attributes
std::vector< std::string >	m_input_node_names

std::vector< std::string >	m_output_node_names

std::unique_ptr< Ort::Session >	m_session

std::unique_ptr< Ort::Env >	m_env

std::string	m_path_to_onnx

int	m_num_wp {}

Detailed Description

Definition at line 14 of file JetTagPerformanceCalibration/xAODBTaggingEfficiency/xAODBTaggingEfficiency/OnnxUtil.h.

Constructor & Destructor Documentation

◆ OnnxUtil()

OnnxUtil::OnnxUtil ( const std::string & name )

Definition at line 16 of file FlavorTagDiscriminants/Root/OnnxUtil.cxx.

     : m_env (std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_FATAL, ""))
   {
     // initialize session options
     Ort::SessionOptions session_options;
     session_options.SetIntraOpNumThreads(1);
  
     // Ignore all non-fatal errors. This isn't a good idea, but it's
     // what we get for uploading semi-working graphs.
     session_options.SetLogSeverityLevel(4);
     session_options.SetGraphOptimizationLevel(
       GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
  
     // declare an allocator with default options
     Ort::AllocatorWithDefaultOptions allocator;
  
     // create session and load model into memory
     m_session = std::make_unique<Ort::Session>(
       *m_env, path_to_onnx.c_str(), session_options);
  
     // get metadata from the onnx model
     m_metadata = loadMetadata("gnn_config");
     m_num_inputs = m_session->GetInputCount();
     m_num_outputs = m_session->GetOutputCount();
  
     // get the onnx model version
     if (m_metadata.contains("onnx_model_version")) { // metadata version is explicitly set
       m_onnx_model_version = m_metadata["onnx_model_version"].get<OnnxModelVersion>();
       if (m_onnx_model_version == OnnxModelVersion::UNKNOWN){
         throw std::runtime_error("Unknown Onnx model version!");
       }
     } else { // metadata version is not set, infer from the presence of "outputs" key
       if (m_metadata.contains("outputs")){
         m_onnx_model_version = OnnxModelVersion::V0;
       } else {
         throw std::runtime_error("Onnx model version not found in metadata");
       }
     }
  
     // get the model name
     m_model_name = determineModelName();
  
     // iterate over input nodes and get their names
     for (size_t i = 0; i < m_num_inputs; i++) {
       std::string input_name = m_session->GetInputNameAllocated(i, allocator).get();
       m_input_node_names.push_back(input_name);
      }
  
     // iterate over output nodes and get their configuration
     for (size_t i = 0; i < m_num_outputs; i++) {
       const auto name = std::string(m_session->GetOutputNameAllocated(i, allocator).get());
       const auto type = m_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetElementType();
       const int rank = m_session->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape().size();
       if (m_onnx_model_version == OnnxModelVersion::V0) {
         const OnnxOutput onnxOutput(name, type, m_model_name);
         m_output_nodes.push_back(onnxOutput);
       } else {
         const OnnxOutput onnxOutput(name, type, rank);
         m_output_nodes.push_back(onnxOutput);
       }
     }
   }

◆ ~OnnxUtil()

OnnxUtil::~OnnxUtil ( )

default

Member Function Documentation

◆ initialize()

void OnnxUtil::initialize ( )

Definition at line 17 of file JetTagPerformanceCalibration/xAODBTaggingEfficiency/Root/OnnxUtil.cxx.

                          {
  
     std::string fullPathToFile = PathResolverFindCalibFile(m_path_to_onnx);
  
     //load the onnx model to memory using the path m_path_to_onnx
     m_env = std::make_unique< Ort::Env >(ORT_LOGGING_LEVEL_WARNING, "");
  
     // initialize session options if needed
     Ort::SessionOptions session_options;
     session_options.SetIntraOpNumThreads(1);
     session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
  
     // create session and load model into memory
     m_session = std::make_unique< Ort::Session >(*m_env, fullPathToFile.c_str(), session_options);
     Ort::AllocatorWithDefaultOptions allocator;
  
     // get the input nodes
     size_t num_input_nodes = m_session->GetInputCount();
     
     // iterate over all input nodes
     for (std::size_t i = 0; i < num_input_nodes; i++) {
     auto input_name = m_session->GetInputNameAllocated(i, allocator);
         m_input_node_names.emplace_back(input_name.get());
     }
  
     // get the output nodes
     size_t num_output_nodes = m_session->GetOutputCount();
     std::vector<int64_t> output_node_dims;
  
     // iterate over all output nodes
     for(std::size_t i = 0; i < num_output_nodes; i++ ) {
         auto output_name = m_session->GetOutputNameAllocated(i, allocator);
             m_output_node_names.emplace_back(output_name.get());
     
         // get output node types
         Ort::TypeInfo type_info = m_session->GetOutputTypeInfo(i);
         auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
  
         output_node_dims = tensor_info.GetShape();
         
         // output is of the shape {1, num_jets, num_wp}
         m_num_wp = output_node_dims.at(2);
     }
 }

◆ runInference() [1/2]

void OnnxUtil::runInference	(	const std::vector< std::vector< float >> &	node_feat,
		std::vector< float > &	effAllJet
	)		const

Definition at line 64 of file JetTagPerformanceCalibration/xAODBTaggingEfficiency/Root/OnnxUtil.cxx.

                                        {
  
     // Inputs:
     //    node_feat : vector<vector<float>>
     //    effAllJet : vector<double>&
  
     std::vector<float> input_tensor_values;
     std::vector<int64_t> input_node_dims = {1, static_cast<int>(node_feat.size()), static_cast<int>(node_feat.at(0).size())};
  
     for (const auto& it : node_feat){
         input_tensor_values.insert(input_tensor_values.end(), it.begin(), it.end());
     }
         
     // create input tensor object from data values
     auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
     Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_values.size(), input_node_dims.data(), input_node_dims.size());
  
     // casting vector<string> to vector<const char*>. this is what ORT expects
     std::vector<const char*> input_node_names(m_input_node_names.size(),nullptr);
     for (unsigned int i=0; i<m_input_node_names.size(); i++) {
         input_node_names[i]= m_input_node_names.at(i).c_str();
     }
     std::vector<const char*> output_node_names(m_output_node_names.size(),nullptr);
     for (int i=0; i<static_cast<int>(m_output_node_names.size()); i++) {
         output_node_names[i]= m_output_node_names.at(i).c_str();
     }
  
     // score model & input tensor, get back output tensor
     // Although Session::Run is non-const, the onnx authors say
     // it is safe to call from multiple threads:
     //  https://github.com/microsoft/onnxruntime/discussions/10107
     Ort::Session& session ATLAS_THREAD_SAFE = *m_session;
     auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, input_node_names.size(), output_node_names.data(), output_node_names.size());
  
     // set the output vector values to the inference results
     float* float_ptr = output_tensors.front().GetTensorMutableData<float>();
     int num_jets = node_feat.size();
     effAllJet = {float_ptr, float_ptr + num_jets};
 }

◆ runInference() [2/2]

void OnnxUtil::runInference	(	const std::vector< std::vector< float >> &	node_feat,
		std::vector< std::vector< float >> &	effAllJetAllWp
	)		const

Definition at line 108 of file JetTagPerformanceCalibration/xAODBTaggingEfficiency/Root/OnnxUtil.cxx.

                                                        {
  
     // Inputs:
     //    node_feat      : vector<vector<float>>
     //    effAllJetAllWp : vector<vector<double>>& shape:{num_jets, num_wp}
  
     // using float because that's what the model expects
     // ort exectues type casting wrong (x = x.float()), so can't change the type inside the model
     std::vector<float> input_tensor_values;
     std::vector<int64_t> input_node_dims = {1, static_cast<int>(node_feat.size()), static_cast<int>(node_feat.at(0).size())};
  
     for (auto& it : node_feat){
         input_tensor_values.insert(input_tensor_values.end(), it.begin(), it.end());
     }
         
     // create input tensor object from data values
     auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
     Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_values.size(), input_node_dims.data(), input_node_dims.size());
     
     // casting vector<string> to vector<const char*>. this is what ORT expects
     std::vector<const char*> input_node_names(m_input_node_names.size(),nullptr);
     for (int i=0; i<static_cast<int>(m_input_node_names.size()); i++) {
         input_node_names[i]= m_input_node_names.at(i).c_str();
     }
     std::vector<const char*> output_node_names(m_output_node_names.size(),nullptr);
     for (int i=0; i<static_cast<int>(m_output_node_names.size()); i++) {
         output_node_names[i]= m_output_node_names.at(i).c_str();
     }
  
     // score model & input tensor, get back output tensor
     // Although Session::Run is non-const, the onnx authors say
     // it is safe to call from multiple threads:
     //  https://github.com/microsoft/onnxruntime/discussions/10107
     Ort::Session& session ATLAS_THREAD_SAFE = *m_session;
     auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, input_node_names.size(), output_node_names.data(), output_node_names.size());
     
     // set the output vector values to the inference results
     float* float_ptr = output_tensors.front().GetTensorMutableData<float>();
  
     int num_jets = node_feat.size();
  
     for (int i=0; i<num_jets; i++){
         std::vector<float> eff_one_jet_tmp;
         for (int j=0; j<m_num_wp; j++){
             eff_one_jet_tmp.push_back(float_ptr[i*m_num_wp+j]);
         }
         effAllJetAllWp.push_back(eff_one_jet_tmp);
     }
 }