From 27de00ac518fedb62f30a24f41de411aa3e0b9e6 Mon Sep 17 00:00:00 2001 From: Preetha Veeramalai Date: Thu, 13 Feb 2025 05:02:02 -0800 Subject: [PATCH 1/5] Add support for optimized bin file creation --- .../providers/openvino/backend_manager.cc | 16 ++++- .../core/providers/openvino/backend_utils.cc | 48 +++++++++++++ .../core/providers/openvino/contexts.h | 69 ++++++++++++++++++- .../openvino/openvino_execution_provider.cc | 27 +++++++- 4 files changed, 153 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 215cfafb2d174..f10ef6917873a 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -215,7 +215,21 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie // If not embed_mode, dump the blob here and only pass on the path to the blob std::string model_blob_str; auto compiled_model = concrete_backend_->GetOVCompiledModel(); - if (session_context_.so_context_embed_mode) { // Internal blob + if (session_context_.so_share_ep_contexts){ + std::ostringstream model_blob_stream; + compiled_model.export_model(model_blob_stream); + + // std::ofstream file(metadata_filename, std::ios::app| std::ios::binary); + // std::cout << " write to metadata bin - " << metadata_filename << std::endl; + auto& bin_file = shared_context_.shared_weights.shared_bin_file.bin_file_; + if (bin_file.is_open()) { + bin_file << model_blob_stream.str(); + } + std::cout << "Current offset after "<< subgraph_context_.subgraph_name << " = " << bin_file.tellp() << std::endl; + + model_blob_str = shared_context_.shared_weights.shared_bin_file.shared_bin_filename.filename().string(); + } else if (session_context_.so_context_embed_mode) { + // Internal blob std::ostringstream model_blob_stream; compiled_model.export_model(model_blob_stream); model_blob_str = std::move(model_blob_stream).str(); diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 2ee5e9ec3e3a9..ea2c1ad9fc163 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -69,6 +69,30 @@ std::ostream& operator<<(std::ostream& stream, const SharedContext::SharedWeight return stream; } +std::ostream& operator<<(std::ostream& stream, + const SharedContext::SharedWeights::SubgraphMetadata::Map& subgraph_metadata) { + try { + stream << subgraph_metadata.size(); + + // Write each key-value pair + // Put elements in separate lines to facilitate reading + for (const auto& [key, value] : subgraph_metadata) { + stream << std::endl + << key.name; + stream << std::endl + << value.epctx_offset; + stream << std::endl + << value.epctx_length; + } + } catch (const Exception& e) { + ORT_THROW("Error: Failed to write subgraph map data.", e.what()); + } catch (...) { + ORT_THROW("Error: Failed to write subgraph map data."); + } + ORT_ENFORCE(stream.good(), "Error: Failed to write subgraph map data."); + return stream; +} + std::istream& operator>>(std::istream& stream, SharedContext::SharedWeights::Metadata::Map& metadata) { size_t map_size{0}; try { @@ -117,6 +141,30 @@ std::istream& operator>>(std::istream& stream, SharedContext::SharedWeights::Met return stream; } +std::istream& operator>>(std::istream& stream, SharedContext::SharedWeights::SubgraphMetadata::Map& subgraph_metadata) { + size_t map_size{0}; + try { + stream >> map_size; + + while (!stream.eof()) { + SharedContext::SharedWeights::SubgraphMetadata::Key key; + SharedContext::SharedWeights::SubgraphMetadata::Value value; + stream >> key.name; + stream >> value.epctx_offset; + stream >> value.epctx_length; + + subgraph_metadata.emplace(key, value); + } + } catch (const Exception& e) { + ORT_THROW("Error: Failed to read map data.", e.what()); + } catch (...) { + ORT_THROW("Error: Failed to read map data."); + } + + ORT_ENFORCE(subgraph_metadata.size() == map_size, "Error: Inconsistent map data."); + + return stream; +} namespace backend_utils { diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 1314edd54e937..f8d3e52bf077e 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -25,6 +25,22 @@ class SharedContext : public WeakSingleton { public: SharedContext() : OVCore_(OVCore::Get()) {} struct SharedWeights { + struct Header { + uint32_t bin_version=1; + uint32_t footer_offset; + Header(uint32_t bin_in, uint32_t footer_in) : + bin_version(bin_in), footer_offset(footer_in){} + }; + struct Footer { + uint32_t subgraph_offset; + uint32_t subgraph_length; + uint32_t metadata_offset; + uint32_t metadata_length; + Footer(uint32_t subgraph_offset_in, uint32_t subgraph_length_in, + uint32_t metadata_offset_in, uint32_t metadata_length_in) : + subgraph_offset(subgraph_offset_in), subgraph_length(subgraph_length_in), + metadata_offset(metadata_offset_in), metadata_length(metadata_length_in) {} + }; struct Metadata { struct Key { std::string name; @@ -37,8 +53,8 @@ class SharedContext : public WeakSingleton { }; struct Value { std::string location; - unsigned int data_offset; - unsigned int size; + uint32_t data_offset; + uint32_t size; std::vector dimensions; std::int32_t element_type; std::shared_ptr tensor; @@ -48,6 +64,25 @@ class SharedContext : public WeakSingleton { friend std::istream& operator>>(std::istream& right, Metadata::Map& metadata); }; + struct SubgraphMetadata { + struct Key { + std::string name; + bool operator==(const Key&) const = default; + }; + struct Hash { + std::size_t operator()(const Key& key) const noexcept { + return std::hash()(key.name); + } + }; + struct Value { + uint32_t epctx_offset; + uint32_t epctx_length; + }; + using Map = std::unordered_map; + friend std::ostream& operator<<(std::ostream& right, const SubgraphMetadata::Map& subgraph_metadata); + friend std::istream& operator>>(std::istream& right, SubgraphMetadata::Map& subgraph_metadata); + }; + struct WeightsFile { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(WeightsFile); WeightsFile() = delete; @@ -60,10 +95,38 @@ class SharedContext : public WeakSingleton { size_t weights_size_; }; + struct SharedBinFile { + // ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SharedBinFile); + // SharedBinFile() = delete; + // SharedBinFile(fs::path shared_bin_filename) : + // bin_file_(shared_bin_filename, std::ios::out | std::ios::app| std::ios::binary) { + // if(bin_file_.is_open()) + // std::cout << " Bin file opened " << std::endl; + // } + fs::path shared_bin_filename; + std::ofstream bin_file_; + + SharedBinFile() = default; // Default constructor + ~SharedBinFile() = default; // Prevent closing the file automatically + + void openBinFile(fs::path shared_bin_filename) { + if (!bin_file_.is_open()) { // Prevent reopening + bin_file_.open(shared_bin_filename, std::ios::out | std::ios::app | std::ios::binary); + if (!bin_file_) { + throw std::runtime_error("Failed to open log file!"); + } + } + } + }shared_bin_file; + fs::path external_weight_filename; std::unique_ptr mapped_weights; + std::unique_ptr
header_; + std::unique_ptr