intel · jatinwadhwa921 · Dec 18, 2024 · Feb 7, 2025 · Feb 9, 2025
diff --git a/.github/workflows/internal_ci.yml b/.github/workflows/internal_ci.yml
@@ -0,0 +1,34 @@
+name : Internal CI
+
+on:
+  pull_request:
+    branches:
+      - '**'  # Triggers on a PR to any Branch
+
+jobs:
+  build:
+
+    runs-on: [self-hosted, Linux, X64]   # Runs on a Lunar lake
+    env:
+      BUILD_SOURCESDIRECTORY: ${{ github.workspace }}
+      BUILD_BINARIESDIRECTORY: ${{ github.workspace }}/build
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        ref: ${{ github.event.pull_request.head.ref }}  # checkout the pr branch
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+       python-version: '3.10'
+
+    - name: Create build directory
+      run: |
+         mkdir -p ${{ env.BUILD_BINARIESDIRECTORY }}
+         chmod -R 777 ${{ env.BUILD_BINARIESDIRECTORY }}
+
+    - name: Running Internal CI   # Trigger Internal CI on the pr branch
+      run: |
+        cd tools/ci_build/github/linux/
+        dir
+        ./run_dockerbuild.sh -o ubuntu22.04 -p 3.10 -d openvino -v 2024.5.0 -x "--config Release --use_openvino CPU --build_wheel --build_shared_lib --parallel "
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -61,6 +61,10 @@
     return "";
   }(subgraph);
 
+  if (!session_context_.shape.empty()) {
+    ValidateInputShapes(session_context_.shape, subgraph.GetInputs());
+  }
+
   // Save the indexes of graph inputs among fused_node's inputDefs
   // (which also contains initializers).
   for (uint32_t index = 0; const auto& node : subgraph.GetInputs()) {
@@ -100,7 +104,7 @@
     }
   }
 
-  if (ModelHasSymbolicInputDims(subgraph)) {
+  if (ModelHasSymbolicInputDims(subgraph) && session_context_.shape.empty()) {
     subgraph_context_.has_dynamic_input_shape = true;
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
     if ((session_context_.device_type.find("CPU") != std::string::npos ||
@@ -308,6 +312,39 @@
   return has_sym_dims;
 }
 
+void BackendManager::ValidateInputShapes(const std::map<std::string, ov::PartialShape>& shape,
+                                         const std::vector<const NodeArg*>& graph_inputs) const {
+  for (const auto& [tensor_name, requested_shape] : shape) {
+    // Find matching input in graph
+    const NodeArg* graph_input = nullptr;
+    for (const auto* input : graph_inputs) {
+      if (input->Name() == tensor_name) {
+        graph_input = input;
+        break;
+      }
+    }
+
+    if (!graph_input) {
+      ORT_THROW("Input " + tensor_name + "specified in reshape_input does not exist");
+    }
+
+    const ONNX_NAMESPACE::TensorShapeProto* graph_shape = graph_input->Shape();
+    if (!graph_shape) {
+      ORT_THROW("Graph input" + tensor_name + "has no shape information");
+    }
+
+    // Check dimensions count matches
+    size_t graph_dim_count = graph_shape->dim_size();
+    size_t requested_dim_count = requested_shape.get_max_shape().size();
+    if (graph_dim_count != requested_dim_count) {
+      ORT_THROW("Dimensions mismatched for input" + tensor_name +
+                ": graph expects " + std::to_string(graph_dim_count) +
+                " dimensions but reshape_input specifies " +
+                std::to_string(requested_dim_count) + " dimensions");
+    }
+  }
+}
+
 // Check to see if the graph is QDQ
 static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
   std::unordered_set<std::string> qdq_ops = {"QuantizeLinear", "DequantizeLinear"};

diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h
@@ -39,6 +39,8 @@ class BackendManager {
 
   bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
   bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;
+  void ValidateInputShapes(const shape_t& shape,
+                           const std::vector<const NodeArg*>& graph_inputs) const;
 
   std::shared_ptr<ONNX_NAMESPACE::ModelProto>
   ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_proto);

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -146,6 +146,11 @@ CreateOVModel(const std::string model,
   try {
     auto ov_model = OVCore::ReadModel(model, session_context.onnx_model_path_name.string());
 
+    if (!session_context.shape.empty()) {
+      LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape";
+      ov_model->reshape(session_context.shape);
+    }
+
     // Check for Constant Folding
     if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
       ov::pass::ConstantFolding pass_const_obj;

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -96,6 +96,7 @@
     } else if (!session_context_.has_external_weights &&
                !subgraph_context_.has_dynamic_input_shape &&
                !session_context_.so_context_enable &&
+               session_context.shape.empty() &&
                auto_unified_compile) {
       // Unified OV compile_model is efficient when ov model caching is enabled
       // Unified OV compile_model API is supported with AUTO from version 2024.3 and above
@@ -418,9 +419,20 @@
               (it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
             ov_tensor_data_t ov_tensor_data;
             const auto& input = ov_input_info.at(input_idx);
-            ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
-                                                                     const_cast<void*>(tensor.GetTensorRawData()));
-
+            if (!session_context_.shape.empty()) {
+              ov::PartialShape partial_shape = input.get_partial_shape();
+              const auto& ort_dims = tensor.GetTensorTypeAndShapeInfo().GetShape();
+              ValidateOrtDimsAgainstPartialShape(ort_dims, partial_shape);
+              ov::Shape concrete_shape;
+              for (size_t i = 0; i < ort_dims.size(); ++i) {
+                concrete_shape.push_back(ort_dims[i]);
+              }
+              ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), concrete_shape,
+                                                                       const_cast<void*>(tensor.GetTensorRawData()));
+            } else {
+              ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
+                                                                       const_cast<void*>(tensor.GetTensorRawData()));
+            }
             ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
             ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
 
@@ -434,6 +446,10 @@
       }
     }  // Loop subgraph original input names
 
+    if (!session_context_.shape.empty()) {
+      infer_request->Infer();
+    }
+
     if (session_context_.device_type.find("NPU") != std::string::npos) {
       // Set the output blob as remote blob
       auto graph_output_info = exe_network_.Get().outputs();
@@ -465,8 +481,15 @@
           ov_tensor_data_t ov_tensor_data;
           const auto& output = graph_output_info.at(output_idx);
           ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
-          ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
-                                                                   const_cast<void*>(tensor.GetTensorRawData()));
+
+          if (!session_context_.shape.empty()) {
+            ov::Tensor output_tensor = infer_request->GetOutputTensor(output_idx);
+            ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output_tensor.get_shape(),
+                                                                     const_cast<void*>(tensor.GetTensorRawData()));
+          } else {
+            ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
+                                                                     const_cast<void*>(tensor.GetTensorRawData()));
+          }
           ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
 
           try {
@@ -669,6 +692,26 @@
   }
 }
 
+void BasicBackend::ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>& ort_dims,
+                                                      const ov::PartialShape& partial_shape) const {
+  // Check if the number of dimensions matches
+  if (static_cast<int64_t>(ort_dims.size()) != partial_shape.rank().get_length()) {
+    ORT_THROW("Mismatch in number of dimensions between ORT tensor and OpenVINO PartialShape.");
+  }
+  // Validate each dimension
+  for (size_t i = 0; i < ort_dims.size(); ++i) {
+    const auto& ov_dim = partial_shape[i];  // OpenVINO dimension at index i
+    int64_t ort_dim = ort_dims[i];          // ORT dimension at index i
+
+    // Check if the ORT dimension is within the specified range
+    int64_t min_dim = ov_dim.get_min_length();
+    int64_t max_dim = ov_dim.get_max_length();
+    if (ort_dim < min_dim || ort_dim > max_dim) {
+      ORT_THROW(" ORT Dimension is out of range");
+    }
+  }
+}
+
 void BasicBackend::Infer(OrtKernelContext* ctx) {
   // Preliminary Thread safety mechanism
   // currently allows a maximum of 8 Infer request's to parallel execute at the same time

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -51,6 +51,8 @@ class BasicBackend : public IBackend {
   void EnableStreams();
   void SetNumThreads(ov::AnyMap& device_config);
   void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
+  void ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>& ort_dims,
+                                          const ov::PartialShape& partial_shape) const;
 
 #ifdef IO_BUFFER_ENABLED
   void StartRemoteAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);

diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
@@ -61,6 +61,7 @@ struct SharedContext {
 };
 
 using config_t = std::map<std::string, ov::AnyMap>;
+using shape_t = std::map<std::string, ov::PartialShape>;
 
 struct ProviderInfo {
   std::string device_type{""};             // [device_type]: Overrides the accelerator hardware type and
@@ -74,6 +75,7 @@ struct ProviderInfo {
   uint32_t num_of_threads{0};              // [num_of_threads]: Overrides the accelerator default value of
                                            // number of threads with this value at runtime.
   config_t load_config{};                  // JSON config map to load custom OV parameters.
+  shape_t shape{};                         // Used for reshaping ov tensors to a particular lower and upper bound
   fs::path cache_dir{""};                  // [cache_dir]: specify the path to
                                            // dump and load the blobs for the model caching/kernel caching
                                            // (GPU) feature. If blob files are already present,

diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -148,7 +148,7 @@
                             << "Update the 'device_type' to specified types 'CPU', 'GPU', 'GPU.0', "
                             << "'GPU.1', 'NPU' or from"
                             << " HETERO/MULTI/AUTO options and set 'precision' separately. \n";
-      int delimit = device_type.find("_");
+      auto delimit = device_type.find("_");
       device_type = device_type.substr(0, delimit);
       return device_type.substr(delimit + 1);
     }
@@ -212,6 +212,97 @@
 
     pi.precision = ParsePrecision(provider_options, pi.device_type, "precision");
 
+    if (provider_options.contains("reshape_input") && pi.device_type == "NPU") {
+      auto parse_input_shapes = [&](const std::string& reshape_input_definition) {
+        std::map<std::string, ov::PartialShape> parsed_shape_map;
+        std::string unparsed_definition = reshape_input_definition;
+
+        while (!unparsed_definition.empty()) {
+          // Find the next shape definition brakcet
+          auto shape_start_bracket = unparsed_definition.find_first_of('[');
+          if (shape_start_bracket == std::string::npos) {
+            ORT_THROW("Malformed input: missing opening bracket '[' in: " + unparsed_definition);
+          }
+          // Extract the tensor name
+          std::string tensor_name = unparsed_definition.substr(0, shape_start_bracket);
+          // Remove the leading/trailing whitespaces
+          tensor_name.erase(0, tensor_name.find_first_not_of("\t"));
+          tensor_name.erase(tensor_name.find_last_not_of("\t") + 1);
+
+          if (tensor_name.empty()) {
+            ORT_THROW("Empty tensor name provided in rehsape_input parameter");
+          }
+
+          // Closing bracket for current shape definition
+          auto shape_end_bracket = unparsed_definition.find_first_of(']', shape_start_bracket);
+
+          if (shape_end_bracket == std::string::npos || shape_end_bracket < shape_start_bracket) {
+            ORT_THROW("Missing closing bracket ']' for tensor: " + tensor_name);
+          }
+
+          // Extract shape dimensions string
+          std::string shape_dimension_str = unparsed_definition.substr(shape_start_bracket + 1,
+                                                                       shape_end_bracket - shape_start_bracket - 1);
+          std::vector<ov::Dimension> dimension_values;
+          std::stringstream dimension_stream(shape_dimension_str);
+          std::string dimension_token;
+
+          while (std::getline(dimension_stream, dimension_token, ',')) {
+            // Remove leading/trailing whitespaces
+            dimension_token.erase(0, dimension_token.find_first_not_of("\t"));
+            dimension_token.erase(dimension_token.find_last_not_of("\t") + 1);
+
+            // Check if dimension is a range
+            size_t range_seperator_pos = dimension_token.find("..");
+            if (range_seperator_pos != std::string::npos) {
+              std::string range_start_str = dimension_token.substr(0, range_seperator_pos);
+              std::string range_end_str = dimension_token.substr(range_seperator_pos + 2);
+
+              // Remove leading/trailing spaced
+              range_start_str.erase(0, range_start_str.find_first_not_of("\t"));
+              range_start_str.erase(range_start_str.find_last_not_of("\t") + 1);
+              range_end_str.erase(0, range_end_str.find_first_not_of("\t"));
+              range_end_str.erase(range_end_str.find_last_not_of("\t") + 1);
+
+              if (range_start_str.empty() || range_end_str.empty() ||
+                  !std::all_of(range_start_str.begin(), range_start_str.end(), ::isdigit) ||
+                  !std::all_of(range_end_str.begin(), range_end_str.end(), ::isdigit)) {
+                ORT_THROW("Invalid dimension range format: " + dimension_token + " for tensor: " + tensor_name);
+              }
+
+              int range_start = std::stoi(range_start_str);
+              int range_end = std::stoi(range_end_str);
+
+              if (range_start > range_end) {
+                ORT_THROW("Invalid dimension range (start > end) for tensor: " + tensor_name);
+              }
+
+              dimension_values.emplace_back(ov::Dimension(range_start, range_end));
+            } else {
+              // Handle single dimension value
+              if (dimension_token.empty() ||
+                  !std::all_of(dimension_token.begin(), dimension_token.end(), ::isdigit)) {
+                ORT_THROW("Invalid dimension value: " + dimension_token + " for tensor: " + tensor_name);
+              }
+              dimension_values.emplace_back(std::stoi(dimension_token));
+            }
+          }
+
+          // Store parsed shape in result map
+          parsed_shape_map[tensor_name] = ov::PartialShape(dimension_values);
+          // Update reminaing unparsed string
+          unparsed_definition = unparsed_definition.substr(shape_end_bracket + 1);
+          if (!unparsed_definition.empty() && unparsed_definition.front() == ',') {
+            unparsed_definition = unparsed_definition.substr(1);
+          }
+          // Remove leading whitespaces
+          unparsed_definition.erase(0, unparsed_definition.find_first_not_of("\t"));
+        }
+        return parsed_shape_map;
+      };
+      pi.shape = parse_input_shapes(provider_options.at("reshape_input"));
+    }
+
     if (provider_options.contains("load_config")) {
       auto parse_config = [&](const std::string& config_str) -> std::map<std::string, ov::AnyMap> {
         // If the config string is empty, return an empty map and skip processing

diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -211,6 +211,16 @@ OVTensorPtr OVInferRequest::GetTensor(const std::string& input_name) {
   }
 }
 
+OVTensor OVInferRequest::GetOutputTensor(const int& output_idx) {
+  try {
+    return ovInfReq.get_output_tensor(output_idx);
+  } catch (const Exception& e) {
+    ORT_THROW(log_tag + " Cannot access output tensor: " + e.what());
+  } catch (...) {
+    ORT_THROW(log_tag + " Cannot access output tensor");
+  }
+}
+
 std::string OVInferRequest::GetInputTensorName(uint32_t index) {
   try {
     const auto& model = ovInfReq.get_compiled_model();
@@ -233,7 +243,7 @@ void OVInferRequest::SetTensor(const std::string& name, OVTensorPtr& blob) {
 }
 
 uint32_t OVInferRequest::GetNumInputs() {
-  return ovInfReq.get_compiled_model().inputs().size();
+  return static_cast<uint32_t>(ovInfReq.get_compiled_model().inputs().size());
 }
 
 void OVInferRequest::StartAsync() {

diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -91,6 +91,7 @@ class OVInferRequest {
   OVTensorPtr GetTensor(const std::string& name);
   std::string GetInputTensorName(uint32_t index);
   void SetTensor(const std::string& name, OVTensorPtr& blob);
+  OVTensor GetOutputTensor(const int& output_idx);
   void StartAsync();
   void Infer();
   void WaitRequest();

diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
@@ -787,6 +787,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         }
       } else if (key == "device_memory_name") {
         device_memory_name_ = std::move(value);
+      } else if (key == "reshape_input") {
+        ov_options[key] = value;
       } else {
         ORT_THROW(
             "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO."