diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 215cfafb2d174..28f2204bacf1d 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -77,7 +77,20 @@ BackendManager::BackendManager(SessionContext& session_context, ptr_stream_t model_stream; std::unique_ptr model_proto; if (subgraph_context_.is_ep_ctx_graph) { - model_stream = ep_ctx_handle_.GetModelBlobStream(session_context_.so_context_file_path, subgraph); + std::string filename; + if (!session_context_.so_context_file_path.empty()) { + filename = session_context_.so_context_file_path.filename().string(); + } else if (!session_context_.onnx_model_path_name.empty()) { + filename = session_context_.onnx_model_path_name.filename().string(); + } else { + ORT_THROW("Either Session_options ep.context_file_path or model path must be specified"); + } + std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(filename); + auto subgraph_name = model_name + "_" + subgraph_context_.subgraph_name; + model_stream = ep_ctx_handle_.GetModelBlobStream(shared_context_, + session_context_.so_context_file_path, + subgraph_name, + subgraph); } else { model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger); } @@ -98,6 +111,8 @@ BackendManager::BackendManager(SessionContext& session_context, sw.mapped_weights = std::make_unique(weight_filename); } backend_utils::CreateOVTensors(session_context_.device_type, sw.metadata, *sw.mapped_weights); + } else { + ORT_THROW(" External weight file is not found "); } } @@ -198,6 +213,19 @@ BackendManager::BackendManager(SessionContext& session_context, } } +std::string BackendManager::stripAfterFirstDot(std::string filename) { + size_t dotPos = filename.find('.'); // Find first dot + size_t ctxPos = filename.find("_ctx"); // Find first dot + if (dotPos == std::string::npos && ctxPos == std::string::npos) { + return filename; // No dot found, return full filename + } + if (dotPos != std::string::npos) + filename = filename.substr(0, dotPos); // strip everything after first dot + if (ctxPos != std::string::npos) + filename = filename.substr(0, ctxPos); // strip everything after _ctx + return filename; +} + // Call EPContext model exporter here if the provider option for exporting // precompiled blob is set. If that's the case: // By default, create model in embed mode where the blob stream is exported as data within @@ -215,7 +243,35 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie // If not embed_mode, dump the blob here and only pass on the path to the blob std::string model_blob_str; auto compiled_model = concrete_backend_->GetOVCompiledModel(); - if (session_context_.so_context_embed_mode) { // Internal blob + if (session_context_.so_share_ep_contexts) { + std::ostringstream model_blob_stream; + compiled_model.export_model(model_blob_stream); + + auto& subgraph_metadata = shared_context_.shared_weights.subgraph_metadata; + std::string filename = ""; + if (!session_context_.so_context_file_path.empty()) { + filename = session_context_.so_context_file_path.filename().string(); + } else if (!session_context_.onnx_model_path_name.empty()) { + filename = session_context_.onnx_model_path_name.filename().string(); + } else { + ORT_THROW("Either Session_options ep.context_file_path or model path must be specified"); + } + std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(filename); + auto subgraph_name = model_name + "_" + subgraph_context_.subgraph_name; + sw::SubgraphMetadata::Map::key_type key{subgraph_name}; + sw::SubgraphMetadata::Map::mapped_type value{}; + + auto& bin_file = shared_context_.shared_weights.shared_bin_file.bin_file_; + if (!subgraph_metadata.contains(key) && bin_file.is_open()) { + value.epctx_offset = static_cast(bin_file.tellp()); + bin_file << model_blob_stream.str(); + value.epctx_length = static_cast(static_cast(bin_file.tellp()) - value.epctx_offset); + subgraph_metadata.emplace(key, std::move(value)); + } + + model_blob_str = shared_context_.shared_weights.shared_bin_file.shared_bin_filename.filename().string(); + } else if (session_context_.so_context_embed_mode) { + // Internal blob std::ostringstream model_blob_stream; compiled_model.export_model(model_blob_stream); model_blob_str = std::move(model_blob_stream).str(); diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h index cdc27701ec2e6..57da7e73d3a0b 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.h +++ b/onnxruntime/core/providers/openvino/backend_manager.h @@ -47,6 +47,8 @@ class BackendManager { ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_proto, const std::vector>& input_shapes); + std::string stripAfterFirstDot(std::string filename); + std::unique_ptr model_proto_; std::shared_ptr concrete_backend_; std::map> backend_map_; diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 2ee5e9ec3e3a9..06270d16a5ca5 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -19,105 +19,6 @@ using Exception = ov::Exception; namespace onnxruntime { namespace openvino_ep { - -SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary) { - try { - file_.exceptions(std::ifstream::failbit | std::ifstream::badbit); - weights_size_ = file_.seekg(0, std::ios::end).tellg(); - } catch (std::ifstream::failure& e) { - ORT_THROW("Error: Failed to open weight file at ", filename.string(), " ", e.what()); - } -} - -void SharedContext::SharedWeights::WeightsFile::load_weights(size_t file_offset, void* data, size_t size) { - ORT_ENFORCE(file_offset < weights_size_ && size <= weights_size_ && (file_offset <= weights_size_ - size), "Error: File offset is out of bounds."); - file_.seekg(file_offset); - file_.read(reinterpret_cast(data), size); -} - -std::ostream& operator<<(std::ostream& stream, const SharedContext::SharedWeights::Metadata::Map& metadata) { - try { - stream << metadata.size(); - - // Write each key-value pair - // Put elements in separate lines to facilitate reading - for (const auto& [key, value] : metadata) { - stream << std::endl - << key.name; - stream << std::endl - << value.location; - stream << std::endl - << value.data_offset; - stream << std::endl - << value.size; - stream << std::endl - << value.dimensions.size(); - for (const auto& dim : value.dimensions) { - stream << std::endl - << dim; - } - stream << std::endl - << value.element_type; - } - } catch (const Exception& e) { - ORT_THROW("Error: Failed to write map data.", e.what()); - } catch (...) { - ORT_THROW("Error: Failed to write map data."); - } - - ORT_ENFORCE(stream.good(), "Error: Failed to write map data."); - return stream; -} - -std::istream& operator>>(std::istream& stream, SharedContext::SharedWeights::Metadata::Map& metadata) { - size_t map_size{0}; - try { - stream >> map_size; - - while (!stream.eof()) { - SharedContext::SharedWeights::Metadata::Key key; - SharedContext::SharedWeights::Metadata::Value value; - stream >> key.name; - stream >> value.location; - stream >> value.data_offset; - stream >> value.size; - size_t num_dimensions; - stream >> num_dimensions; - - if (stream.fail()) { - ORT_THROW("Error: Failed to read num_dimensions from stream."); - } - - constexpr size_t MAX_SAFE_DIMENSIONS = 1024; - - size_t safe_num_dimensions = num_dimensions; - - if (num_dimensions == 0 || safe_num_dimensions > MAX_SAFE_DIMENSIONS) { - ORT_THROW("Invalid number of dimensions provided."); - } - try { - value.dimensions.resize(safe_num_dimensions); - } catch (const std::bad_alloc&) { - ORT_THROW("Error: Memory allocation failed while resizing dimensions."); - } - - for (auto& dim : value.dimensions) { - stream >> dim; - } - stream >> value.element_type; - metadata.emplace(key, value); - } - } catch (const Exception& e) { - ORT_THROW("Error: Failed to read map data.", e.what()); - } catch (...) { - ORT_THROW("Error: Failed to read map data."); - } - - ORT_ENFORCE(metadata.size() == map_size, "Error: Inconsistent map data."); - - return stream; -} - namespace backend_utils { bool IsDebugEnabled() { @@ -402,7 +303,9 @@ void CreateOVTensors(const std::string& device_name, SharedContext::SharedWeights::Metadata::Map& metadata_map, SharedContext::SharedWeights::WeightsFile& weights) { for (auto& [key, value] : metadata_map) { - if (value.tensor) continue; + if (value.tensor) { + continue; + } // Get element data type auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type; @@ -414,7 +317,6 @@ void CreateOVTensors(const std::string& device_name, // Use remote tensors auto npu_context = OVCore::Get()->core.get_default_context("NPU").as(); auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT); - // Copy data to remote tensor weights.load_weights(value.data_offset, remote_tensor.get(), value.size); value.tensor = std::make_shared(remote_tensor); diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 1314edd54e937..a23b4c3248295 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -11,6 +11,7 @@ #include #include #include "core/common/common.h" +#include "core/providers/shared_library/provider_api.h" #include "core/providers/openvino/ov_interface.h" namespace onnxruntime { @@ -25,6 +26,17 @@ class SharedContext : public WeakSingleton { public: SharedContext() : OVCore_(OVCore::Get()) {} struct SharedWeights { + struct Header { + uint32_t bin_version = 1; + uint64_t footer_offset = 0; + } header_; + struct Footer { + uint64_t subgraph_offset; + size_t subgraph_length; + uint64_t metadata_offset; + size_t metadata_length; + } footer_; + struct Metadata { struct Key { std::string name; @@ -37,16 +49,37 @@ class SharedContext : public WeakSingleton { }; struct Value { std::string location; - unsigned int data_offset; - unsigned int size; + uint32_t data_offset; + uint32_t size; std::vector dimensions; std::int32_t element_type; std::shared_ptr tensor; }; using Map = std::unordered_map; - friend std::ostream& operator<<(std::ostream& right, const Metadata::Map& metadata); - friend std::istream& operator>>(std::istream& right, Metadata::Map& metadata); - }; + void writeMetadataToBinaryFile(SharedContext& shared_context, const Metadata::Map& metadata); + void readMetadataFromBinaryFile(SharedContext& shared_context, Metadata::Map& metadata); + } metadata_; + + struct SubgraphMetadata { + struct Key { + std::string name; + bool operator==(const Key&) const = default; + }; + struct Hash { + std::size_t operator()(const Key& key) const noexcept { + return std::hash()(key.name); + } + }; + struct Value { + uint64_t epctx_offset; + size_t epctx_length; + }; + using Map = std::unordered_map; + void writeSubgraphDataToBinaryFile(SharedContext& shared_context, + const SubgraphMetadata::Map& subgraph_metadata); + void readSubgraphDataFromBinaryFile(SharedContext& shared_context, + SubgraphMetadata::Map& subgraph_metadata); + } subgraph_metadata_; struct WeightsFile { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(WeightsFile); @@ -60,9 +93,54 @@ class SharedContext : public WeakSingleton { size_t weights_size_; }; + struct SharedBinFile { + fs::path shared_bin_filename; + std::fstream bin_file_; + size_t bin_size_; + + SharedBinFile() = default; // Default constructor + ~SharedBinFile() { + if (bin_file_.is_open()) { + bin_file_.close(); // Close file when object is destroyed + } + } + + void openBinFile(const fs::path shared_bin_filename) { + // Check if the file exists before trying to open + if (!fs::exists(shared_bin_filename)) { + std::ofstream createFile(shared_bin_filename, std::ios::binary); // Create an empty binary file + if (!createFile) { + ORT_THROW("Failed to create the shared bin file!"); + } + createFile.close(); + } + + // Check if the file is accessible for reading and writing + fs::perms file_perms = fs::status(shared_bin_filename).permissions(); + + if ((file_perms & fs::perms::owner_read) == fs::perms::none || + (file_perms & fs::perms::owner_write) == fs::perms::none) { + ORT_THROW("Failed to open shared bin file! Insufficient permissions for file " + shared_bin_filename + "."); + } + + if (!bin_file_.is_open()) { // Prevent reopening + bin_file_.open(shared_bin_filename, std::ios::in | std::ios::out | std::ios::binary); + bin_size_ = bin_file_.seekg(0, std::ios::end).tellg(); + bin_file_.seekg(0, std::ios::beg); // Reset to the beginning of the file + + if (!bin_file_) { + ORT_THROW("Failed to open shared bin file!"); + } + } + } + void readBinFile(SharedContext& shared_context_); + void dumpBinFile(SharedContext& shared_context_); + } shared_bin_file; + fs::path external_weight_filename; std::unique_ptr mapped_weights; Metadata::Map metadata; + SubgraphMetadata::Map subgraph_metadata; } shared_weights; }; diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index 7bd4f8d96cc55..31ba0f24780c7 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -99,7 +99,10 @@ Status EPCtxHandler::AddOVEPCtxNodeToGraph(const GraphViewer& graph_viewer, return Status::OK(); } -std::unique_ptr EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const { +std::unique_ptr EPCtxHandler::GetModelBlobStream(SharedContext& shared_context_, + const std::filesystem::path& so_context_file_path, + const std::string& subgraph_name, + const GraphViewer& graph_viewer) const { auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin(); auto node = graph_viewer.GetNode(first_index); ORT_ENFORCE(node != nullptr); @@ -121,7 +124,32 @@ std::unique_ptr EPCtxHandler::GetModelBlobStream(const std::filesy } blob_filepath = blob_filepath.parent_path() / ep_cache_context; ORT_ENFORCE(std::filesystem::exists(blob_filepath), "Blob file not found: ", blob_filepath.string()); - result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in)); + if (blob_filepath == shared_context_.shared_weights.shared_bin_file.shared_bin_filename) { + LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from Shared bin file - " << blob_filepath; + auto& sb = shared_context_.shared_weights.shared_bin_file; + // check if size of bin file is greater than the header as it gets written at the begining + ORT_ENFORCE(sb.bin_size_ > 8, " Bin file is empty. Regenerate the epctx model. Bin file path : ", blob_filepath.string()); + auto subgraph_metadata = shared_context_.shared_weights.subgraph_metadata; + using Key = SharedContext::SharedWeights::SubgraphMetadata::Key; + const auto subgraph_key = Key{subgraph_name}; + auto it = subgraph_metadata.find(subgraph_key); + if (it != subgraph_metadata.end()) { + auto& value = it->second; + if (value.epctx_offset < sb.bin_size_ && value.epctx_length <= sb.bin_size_ && + (value.epctx_offset <= sb.bin_size_ - value.epctx_length)) { + sb.bin_file_.seekg(value.epctx_offset); // Move to the specified offset + std::string buffer(value.epctx_length, '\0'); // preallocate space + sb.bin_file_.read(&buffer[0], value.epctx_length); // Read the specified length + // Adjust string size in case of a short read + buffer.resize(sb.bin_file_.gcount()); + result.reset((std::istream*)new std::istringstream(buffer)); + } + } + ORT_ENFORCE(result != nullptr, " Epctx blob is not read. Check bin file correctness from Bin path: ", + blob_filepath.string()); + } else { + result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in)); + } } LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node"; return result; diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h index ff978bd6534d8..b840b5d2a4f13 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h @@ -6,8 +6,10 @@ #include #include #include +#include #include "core/providers/shared_library/provider_api.h" +#include "core/providers/openvino/contexts.h" namespace onnxruntime { namespace openvino_ep { @@ -31,7 +33,10 @@ class EPCtxHandler { const std::string& graph_name, const bool embed_mode, std::string&& model_blob_str) const; - std::unique_ptr GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const; + std::unique_ptr GetModelBlobStream(SharedContext& shared_context_, + const std::filesystem::path& so_context_file_path, + const std::string& subgraph_name, + const GraphViewer& graph_viewer) const; InlinedVector GetEPCtxNodes() const; private: @@ -39,6 +44,5 @@ class EPCtxHandler { std::unique_ptr epctx_model_; const logging::Logger& logger_; }; - } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index f9d4ab13cf2ce..714ab76fc2108 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -93,26 +93,49 @@ common::Status OpenVINOExecutionProvider::Compile( std::vector& node_compute_funcs) { auto& logger = *GetLogger(); Status status = Status::OK(); - + auto& sb = shared_context_->shared_weights.shared_bin_file; if (!fused_nodes.empty()) { // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get(); session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string(); session_context_.onnx_opset_version = graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain); + try { + if (session_context_.so_share_ep_contexts) { + if (session_context_.so_context_file_path.empty()) { + sb.shared_bin_filename = session_context_.onnx_model_path_name.parent_path() / "metadata.bin"; + } else { + sb.shared_bin_filename = session_context_.so_context_file_path.parent_path() / "metadata.bin"; + } + sb.openBinFile(sb.shared_bin_filename); + } + } catch (std::string msg) { + ORT_THROW(msg); + } } - // Temporary code to read metadata before it moves to the .bin - auto& metadata = shared_context_->shared_weights.metadata; - if (session_context_.so_share_ep_contexts && metadata.empty()) { - // Metadata is always read from model location, this could be a source or epctx model - fs::path metadata_filename = session_context_.onnx_model_path_name.parent_path() / "metadata.bin"; - std::ifstream file(metadata_filename, std::ios::binary); - if (file) { - file >> metadata; + // Read the contents of bin file into the shared structs if exists + try{ + auto& subgraph_metadata = shared_context_->shared_weights.subgraph_metadata; + auto& metadata = shared_context_->shared_weights.metadata; + if (session_context_.so_share_ep_contexts) { + shared_context_->shared_weights.shared_bin_file.readBinFile(*shared_context_); } - } + auto& header = shared_context_->shared_weights.header_; + auto& footer = shared_context_->shared_weights.footer_; + + if (sb.bin_file_.is_open()) { + sb.bin_file_.seekp(0, std::ios::beg); + sb.bin_file_.write(reinterpret_cast(&header), sizeof(SharedContext::SharedWeights::Header)); + } + // move the file ptr to the subgraph offset that can help in locating the epctx blobs + if (!subgraph_metadata.empty() && footer.subgraph_offset > sizeof(SharedContext::SharedWeights::Header)) { + sb.bin_file_.seekp(footer.subgraph_offset, std::ios::beg); + } + } catch (std::string msg) { + ORT_THROW(msg); + } struct OpenVINOEPFunctionState { AllocateFunc allocate_func = nullptr; DestroyFunc destroy_func = nullptr; @@ -174,21 +197,7 @@ common::Status OpenVINOExecutionProvider::Compile( } if (session_context_.so_share_ep_contexts) { - fs::path metadata_filename; - if (session_context_.so_context_file_path.empty()) { - metadata_filename = session_context_.onnx_model_path_name.parent_path() / "metadata.bin"; - } else { - metadata_filename = session_context_.so_context_file_path.parent_path() / "metadata.bin"; - } - - // Metadata is generated only for shared contexts - // If saving metadata then save it to the provided path or ose the original model path - // Multiple calls to Compile() will update the metadata and for the last call - // the resulting file will contain the aggregated content - std::ofstream file(metadata_filename, std::ios::binary); - if (file) { - file << metadata; - } + shared_context_->shared_weights.shared_bin_file.dumpBinFile(*shared_context_); } return status; diff --git a/onnxruntime/core/providers/openvino/openvino_file_handling.cc b/onnxruntime/core/providers/openvino/openvino_file_handling.cc new file mode 100644 index 0000000000000..539df89522a32 --- /dev/null +++ b/onnxruntime/core/providers/openvino/openvino_file_handling.cc @@ -0,0 +1,226 @@ +#include +#include +#include +// #include +#include +#include +// #include + +#include "core/providers/openvino/contexts.h" + +using Exception = ov::Exception; + +namespace onnxruntime { +namespace openvino_ep { + +SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary) { + try { + file_.exceptions(std::ifstream::failbit | std::ifstream::badbit); + weights_size_ = file_.seekg(0, std::ios::end).tellg(); + } catch (std::ifstream::failure& e) { + ORT_THROW("Error: Failed to open weight file at ", filename.string(), " ", e.what()); + } +} + +void SharedContext::SharedWeights::WeightsFile::load_weights(size_t file_offset, void* data, size_t size) { + ORT_ENFORCE(file_offset < weights_size_ && size <= weights_size_ && (file_offset <= weights_size_ - size), "Error: File offset is out of bounds."); + file_.seekg(file_offset); + file_.read(reinterpret_cast(data), size); +} + +// Utility function to write a string in bin file +void writeString(std::fstream& file, const std::string& str) { + size_t length = str.size(); + file.write(reinterpret_cast(&length), sizeof(length)); + file.write(str.c_str(), length); +} + +// Utility function to write a vector in bin file +void writeVector(std::fstream& file, const std::vector& vec) { + size_t length = vec.size(); + file.write(reinterpret_cast(&length), sizeof(length)); + file.write(reinterpret_cast(vec.data()), length * sizeof(size_t)); +} + +// Write the entire metadata map to a binary file +void SharedContext::SharedWeights::Metadata::writeMetadataToBinaryFile(SharedContext& shared_context, + const SharedContext::SharedWeights::Metadata::Map& metadata) { + auto& file = shared_context.shared_weights.shared_bin_file.bin_file_; + if (!file.is_open()) { + ORT_THROW("Error opening shared bin file for writing weight as inputs metadata!") + } + + try { + size_t metadataSize = metadata.size(); + file.write(reinterpret_cast(&metadataSize), sizeof(metadataSize)); // Write map size + + for (const auto& [key, value] : metadata) { + writeString(file, key.name); + writeString(file, value.location); + file.write(reinterpret_cast(&value.data_offset), sizeof(value.data_offset)); + file.write(reinterpret_cast(&value.size), sizeof(value.size)); + file.write(reinterpret_cast(&value.element_type), sizeof(value.element_type)); + writeVector(file, value.dimensions); + } + } catch (const Exception& e) { + ORT_THROW("Error: Failed to write map data.", e.what()); + } catch (...) { + ORT_THROW("Error: Failed to write map data."); + } +} + +// Write the entire subgraph metadata map to a binary file +void SharedContext::SharedWeights::SubgraphMetadata::writeSubgraphDataToBinaryFile(SharedContext& shared_context, + const SharedContext::SharedWeights::SubgraphMetadata::Map& subgraph_metadata) { + auto& file = shared_context.shared_weights.shared_bin_file.bin_file_; + if (!file.is_open()) { + ORT_THROW("Error opening shared bin file for writing subgraph metadata!"); + } + try { + size_t subgraph_metadataSize = subgraph_metadata.size(); + file.write(reinterpret_cast(&subgraph_metadataSize), sizeof(subgraph_metadataSize)); // Write map size + + for (const auto& [key, value] : subgraph_metadata) { + writeString(file, key.name); + file.write(reinterpret_cast(&value.epctx_offset), sizeof(value.epctx_offset)); + file.write(reinterpret_cast(&value.epctx_length), sizeof(value.epctx_length)); + } + + } catch (const Exception& e) { + ORT_THROW("Error: Failed to write map data.", e.what()); + } catch (...) { + ORT_THROW("Error: Failed to write map data."); + } +} + +// Utility function to read a string +std::string readString(std::fstream& file) { + size_t length; + file.read(reinterpret_cast(&length), sizeof(length)); // Read string size + std::string str(length, '\0'); + file.read(&str[0], length); // Read string content + return str; +} + +// Utility function to read a vector +std::vector readVector(std::fstream& file) { + size_t length; + file.read(reinterpret_cast(&length), sizeof(length)); // Read vector size + std::vector vec(length); + file.read(reinterpret_cast(vec.data()), length * sizeof(size_t)); // Read vector elements + return vec; +} + +// Read the Metadata map from a binary file +void SharedContext::SharedWeights::Metadata::readMetadataFromBinaryFile(SharedContext& shared_context, + SharedContext::SharedWeights::Metadata::Map& metadata) { + auto& file = shared_context.shared_weights.shared_bin_file.bin_file_; + if (!file) { + ORT_THROW("Error opening shared bin file for reading weight as input metadata!"); + } + + size_t metadata_mapSize; + + file.read(reinterpret_cast(&metadata_mapSize), sizeof(metadata_mapSize)); // Read map size + + for (size_t i = 0; i < metadata_mapSize; ++i) { + SharedContext::SharedWeights::Metadata::Key key; + SharedContext::SharedWeights::Metadata::Value value; + + key.name = readString(file); // Read key (name) + value.location = readString(file); // Read location + file.read(reinterpret_cast(&value.data_offset), sizeof(value.data_offset)); + file.read(reinterpret_cast(&value.size), sizeof(value.size)); + file.read(reinterpret_cast(&value.element_type), sizeof(value.element_type)); + value.dimensions = readVector(file); // Read vector dimensions + + metadata[key] = value; + } +} + +// Read the Subgraph Metadata map from a binary file +void SharedContext::SharedWeights::SubgraphMetadata::readSubgraphDataFromBinaryFile(SharedContext& shared_context, + SharedContext::SharedWeights::SubgraphMetadata::Map& subgraph_metadata) { + auto& file = shared_context.shared_weights.shared_bin_file.bin_file_; + if (!file) { + ORT_THROW("Error opening shared bin file for reading subgraph metadata!"); + } + + size_t subgraph_metadata_mapSize; + file.read(reinterpret_cast(&subgraph_metadata_mapSize), sizeof(subgraph_metadata_mapSize)); // Read map size + for (size_t i = 0; i < subgraph_metadata_mapSize; ++i) { + SharedContext::SharedWeights::SubgraphMetadata::Key key; + SharedContext::SharedWeights::SubgraphMetadata::Value value; + + key.name = readString(file); // Read key (name) + file.read(reinterpret_cast(&value.epctx_offset), sizeof(value.epctx_offset)); + file.read(reinterpret_cast(&value.epctx_length), sizeof(value.epctx_length)); + subgraph_metadata[key] = value; + } +} + +void SharedContext::SharedWeights::SharedBinFile::readBinFile(SharedContext& shared_context_) { + auto& header = shared_context_.shared_weights.header_; + auto& footer = shared_context_.shared_weights.footer_; + auto& subgraph_metadata_map = shared_context_.shared_weights.subgraph_metadata; + auto& metadata_map = shared_context_.shared_weights.metadata; + auto& sb = shared_context_.shared_weights.shared_bin_file; + try { + if (sb.bin_file_.is_open()) { + auto header_size = sizeof(SharedContext::SharedWeights::Header); + if (sb.bin_size_ > header_size) { + sb.bin_file_.read(reinterpret_cast(&header), header_size); + } + auto footer_size = sizeof(SharedContext::SharedWeights::Footer); + if (header.footer_offset < sb.bin_size_ && footer_size <= sb.bin_size_ && + (header.footer_offset <= sb.bin_size_ - footer_size)) { + sb.bin_file_.seekp(header.footer_offset, std::ios::beg); + sb.bin_file_.read(reinterpret_cast(&footer), footer_size); + } + + if (footer.subgraph_offset < sb.bin_size_ && footer.subgraph_length <= sb.bin_size_ && + (footer.subgraph_offset <= sb.bin_size_ - footer.subgraph_length)) { + sb.bin_file_.seekp(footer.subgraph_offset, std::ios::beg); + shared_context_.shared_weights.subgraph_metadata_.readSubgraphDataFromBinaryFile(shared_context_, subgraph_metadata_map); + } + if (footer.metadata_offset < sb.bin_size_ && footer.metadata_length <= sb.bin_size_ && + (footer.metadata_offset <= sb.bin_size_ - footer.metadata_length)) { + sb.bin_file_.seekp(footer.metadata_offset, std::ios::beg); + shared_context_.shared_weights.metadata_.readMetadataFromBinaryFile(shared_context_, metadata_map); + } + } + } catch (std::string msg) { + ORT_THROW(msg); + } +} + +void SharedContext::SharedWeights::SharedBinFile::dumpBinFile(SharedContext& shared_context_) { + auto& header = shared_context_.shared_weights.header_; + auto& footer = shared_context_.shared_weights.footer_; + auto& subgraph_metadata_map = shared_context_.shared_weights.subgraph_metadata; + auto& metadata_map = shared_context_.shared_weights.metadata; + auto& sb = shared_context_.shared_weights.shared_bin_file; + auto& bin_file = sb.bin_file_; + try { + if (bin_file.is_open()) { + footer.subgraph_offset = static_cast(bin_file.tellp()); + shared_context_->shared_weights.subgraph_metadata_.writeSubgraphDataToBinaryFile(*shared_context_, subgraph_metadata); + footer.metadata_offset = static_cast(bin_file.tellp()); + footer.subgraph_length = static_cast(footer.metadata_offset - footer.subgraph_offset); + shared_context_->shared_weights.metadata_.writeMetadataToBinaryFile(*shared_context_, metadata); + header.footer_offset = static_cast(bin_file.tellp()); + footer.metadata_length = static_cast(header.footer_offset - footer.metadata_offset); + + // Write footer to the bin file + bin_file.write(reinterpret_cast(&footer), sizeof(SharedContext::SharedWeights::Footer)); + // Update header with Footer offset at the end + bin_file.seekp(0, std::ios::beg); + bin_file.write(reinterpret_cast(&header), sizeof(SharedContext::SharedWeights::Header)); + bin_file.close(); + } + } catch (std::string msg) { + ORT_THROW(msg); + } +} +} // namespace openvino_ep +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 6afbd8ce761e5..d5d78d24aab8e 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -132,7 +132,9 @@ OVExeNetwork OVCore::ImportModel(std::istream& model_stream, #endif OVExeNetwork exe(obj); return exe; - } catch (const Exception& e) { + } catch (const ov::Exception& e) { // Catch OpenVINO-specific exceptions + ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); + } catch (const std::exception& e) { ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); } catch (...) { ORT_THROW(log_tag + " Exception while Loading Network for graph " + name);