Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reshape feature implementation #573

Draft
wants to merge 3 commits into
base: ovep-develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/internal_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name : Internal CI

on:
pull_request:
branches:
- '**' # Triggers on a PR to any Branch

jobs:
build:

runs-on: [self-hosted, Linux, X64] # Runs on a Lunar lake
env:
BUILD_SOURCESDIRECTORY: ${{ github.workspace }}
BUILD_BINARIESDIRECTORY: ${{ github.workspace }}/build
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }} # checkout the pr branch

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Create build directory
run: |
mkdir -p ${{ env.BUILD_BINARIESDIRECTORY }}
chmod -R 777 ${{ env.BUILD_BINARIESDIRECTORY }}
- name: Running Internal CI # Trigger Internal CI on the pr branch
run: |
cd tools/ci_build/github/linux/
dir
./run_dockerbuild.sh -o ubuntu22.04 -p 3.10 -d openvino -v 2024.5.0 -x "--config Release --use_openvino CPU --build_wheel --build_shared_lib --parallel "
39 changes: 38 additions & 1 deletion onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@
return "";
}(subgraph);

if (!session_context_.shape.empty()) {
ValidateInputShapes(session_context_.shape, subgraph.GetInputs());
}

// Save the indexes of graph inputs among fused_node's inputDefs
// (which also contains initializers).
for (uint32_t index = 0; const auto& node : subgraph.GetInputs()) {
Expand Down Expand Up @@ -100,7 +104,7 @@
}
}

if (ModelHasSymbolicInputDims(subgraph)) {
if (ModelHasSymbolicInputDims(subgraph) && session_context_.shape.empty()) {
subgraph_context_.has_dynamic_input_shape = true;
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
if ((session_context_.device_type.find("CPU") != std::string::npos ||
Expand Down Expand Up @@ -308,6 +312,39 @@
return has_sym_dims;
}

void BackendManager::ValidateInputShapes(const std::map<std::string, ov::PartialShape>& shape,

Check notice on line 315 in onnxruntime/core/providers/openvino/backend_manager.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/openvino/backend_manager.cc#L315

Add #include <map> for map<> [build/include_what_you_use] [4]
Raw output
onnxruntime/core/providers/openvino/backend_manager.cc:315:  Add #include <map> for map<>  [build/include_what_you_use] [4]
const std::vector<const NodeArg*>& graph_inputs) const {
for (const auto& [tensor_name, requested_shape] : shape) {
// Find matching input in graph
const NodeArg* graph_input = nullptr;
for (const auto* input : graph_inputs) {
if (input->Name() == tensor_name) {
graph_input = input;
break;
}
}

if (!graph_input) {
ORT_THROW("Input " + tensor_name + "specified in reshape_input does not exist");
}

const ONNX_NAMESPACE::TensorShapeProto* graph_shape = graph_input->Shape();
if (!graph_shape) {
ORT_THROW("Graph input" + tensor_name + "has no shape information");
}

// Check dimensions count matches
size_t graph_dim_count = graph_shape->dim_size();
size_t requested_dim_count = requested_shape.get_max_shape().size();
if (graph_dim_count != requested_dim_count) {
ORT_THROW("Dimensions mismatched for input" + tensor_name +
": graph expects " + std::to_string(graph_dim_count) +
" dimensions but reshape_input specifies " +
std::to_string(requested_dim_count) + " dimensions");
}
}
}

// Check to see if the graph is QDQ
static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
std::unordered_set<std::string> qdq_ops = {"QuantizeLinear", "DequantizeLinear"};
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class BackendManager {

bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;
void ValidateInputShapes(const shape_t& shape,
const std::vector<const NodeArg*>& graph_inputs) const;

std::shared_ptr<ONNX_NAMESPACE::ModelProto>
ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_proto);
Expand Down
5 changes: 5 additions & 0 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ CreateOVModel(const std::string model,
try {
auto ov_model = OVCore::ReadModel(model, session_context.onnx_model_path_name.string());

if (!session_context.shape.empty()) {
LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape";
ov_model->reshape(session_context.shape);
}

// Check for Constant Folding
if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
ov::pass::ConstantFolding pass_const_obj;
Expand Down
53 changes: 48 additions & 5 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
} else if (!session_context_.has_external_weights &&
!subgraph_context_.has_dynamic_input_shape &&
!session_context_.so_context_enable &&
session_context.shape.empty() &&
auto_unified_compile) {
// Unified OV compile_model is efficient when ov model caching is enabled
// Unified OV compile_model API is supported with AUTO from version 2024.3 and above
Expand Down Expand Up @@ -418,9 +419,20 @@
(it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
ov_tensor_data_t ov_tensor_data;
const auto& input = ov_input_info.at(input_idx);
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
const_cast<void*>(tensor.GetTensorRawData()));

if (!session_context_.shape.empty()) {
ov::PartialShape partial_shape = input.get_partial_shape();
const auto& ort_dims = tensor.GetTensorTypeAndShapeInfo().GetShape();
ValidateOrtDimsAgainstPartialShape(ort_dims, partial_shape);
ov::Shape concrete_shape;
for (size_t i = 0; i < ort_dims.size(); ++i) {
concrete_shape.push_back(ort_dims[i]);
}
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), concrete_shape,
const_cast<void*>(tensor.GetTensorRawData()));
} else {
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input.get_shape(),
const_cast<void*>(tensor.GetTensorRawData()));
}
ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;

Expand All @@ -434,6 +446,10 @@
}
} // Loop subgraph original input names

if (!session_context_.shape.empty()) {
infer_request->Infer();
}

if (session_context_.device_type.find("NPU") != std::string::npos) {
// Set the output blob as remote blob
auto graph_output_info = exe_network_.Get().outputs();
Expand Down Expand Up @@ -465,8 +481,15 @@
ov_tensor_data_t ov_tensor_data;
const auto& output = graph_output_info.at(output_idx);
ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
const_cast<void*>(tensor.GetTensorRawData()));

if (!session_context_.shape.empty()) {
ov::Tensor output_tensor = infer_request->GetOutputTensor(output_idx);
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output_tensor.get_shape(),
const_cast<void*>(tensor.GetTensorRawData()));
} else {
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
const_cast<void*>(tensor.GetTensorRawData()));
}
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;

try {
Expand Down Expand Up @@ -669,6 +692,26 @@
}
}

void BasicBackend::ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>& ort_dims,

Check notice on line 695 in onnxruntime/core/providers/openvino/backends/basic_backend.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/openvino/backends/basic_backend.cc#L695

Add #include <vector> for vector<> [build/include_what_you_use] [4]
Raw output
onnxruntime/core/providers/openvino/backends/basic_backend.cc:695:  Add #include <vector> for vector<>  [build/include_what_you_use] [4]
const ov::PartialShape& partial_shape) const {
// Check if the number of dimensions matches
if (static_cast<int64_t>(ort_dims.size()) != partial_shape.rank().get_length()) {
ORT_THROW("Mismatch in number of dimensions between ORT tensor and OpenVINO PartialShape.");
}
// Validate each dimension
for (size_t i = 0; i < ort_dims.size(); ++i) {
const auto& ov_dim = partial_shape[i]; // OpenVINO dimension at index i
int64_t ort_dim = ort_dims[i]; // ORT dimension at index i

// Check if the ORT dimension is within the specified range
int64_t min_dim = ov_dim.get_min_length();
int64_t max_dim = ov_dim.get_max_length();
if (ort_dim < min_dim || ort_dim > max_dim) {
ORT_THROW(" ORT Dimension is out of range");
}
}
}

void BasicBackend::Infer(OrtKernelContext* ctx) {
// Preliminary Thread safety mechanism
// currently allows a maximum of 8 Infer request's to parallel execute at the same time
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/openvino/backends/basic_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class BasicBackend : public IBackend {
void EnableStreams();
void SetNumThreads(ov::AnyMap& device_config);
void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
void ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>& ort_dims,
const ov::PartialShape& partial_shape) const;

#ifdef IO_BUFFER_ENABLED
void StartRemoteAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ struct SharedContext {
};

using config_t = std::map<std::string, ov::AnyMap>;
using shape_t = std::map<std::string, ov::PartialShape>;

struct ProviderInfo {
std::string device_type{""}; // [device_type]: Overrides the accelerator hardware type and
Expand All @@ -74,6 +75,7 @@ struct ProviderInfo {
uint32_t num_of_threads{0}; // [num_of_threads]: Overrides the accelerator default value of
// number of threads with this value at runtime.
config_t load_config{}; // JSON config map to load custom OV parameters.
shape_t shape{}; // Used for reshaping ov tensors to a particular lower and upper bound
fs::path cache_dir{""}; // [cache_dir]: specify the path to
// dump and load the blobs for the model caching/kernel caching
// (GPU) feature. If blob files are already present,
Expand Down
93 changes: 92 additions & 1 deletion onnxruntime/core/providers/openvino/openvino_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
<< "Update the 'device_type' to specified types 'CPU', 'GPU', 'GPU.0', "
<< "'GPU.1', 'NPU' or from"
<< " HETERO/MULTI/AUTO options and set 'precision' separately. \n";
int delimit = device_type.find("_");
auto delimit = device_type.find("_");
device_type = device_type.substr(0, delimit);
return device_type.substr(delimit + 1);
}
Expand Down Expand Up @@ -212,6 +212,97 @@

pi.precision = ParsePrecision(provider_options, pi.device_type, "precision");

if (provider_options.contains("reshape_input") && pi.device_type == "NPU") {
auto parse_input_shapes = [&](const std::string& reshape_input_definition) {
std::map<std::string, ov::PartialShape> parsed_shape_map;
std::string unparsed_definition = reshape_input_definition;

while (!unparsed_definition.empty()) {
// Find the next shape definition brakcet
auto shape_start_bracket = unparsed_definition.find_first_of('[');
if (shape_start_bracket == std::string::npos) {
ORT_THROW("Malformed input: missing opening bracket '[' in: " + unparsed_definition);
}
// Extract the tensor name
std::string tensor_name = unparsed_definition.substr(0, shape_start_bracket);
// Remove the leading/trailing whitespaces
tensor_name.erase(0, tensor_name.find_first_not_of("\t"));
tensor_name.erase(tensor_name.find_last_not_of("\t") + 1);

if (tensor_name.empty()) {
ORT_THROW("Empty tensor name provided in rehsape_input parameter");
}

// Closing bracket for current shape definition
auto shape_end_bracket = unparsed_definition.find_first_of(']', shape_start_bracket);

if (shape_end_bracket == std::string::npos || shape_end_bracket < shape_start_bracket) {
ORT_THROW("Missing closing bracket ']' for tensor: " + tensor_name);
}

// Extract shape dimensions string
std::string shape_dimension_str = unparsed_definition.substr(shape_start_bracket + 1,
shape_end_bracket - shape_start_bracket - 1);
std::vector<ov::Dimension> dimension_values;
std::stringstream dimension_stream(shape_dimension_str);
std::string dimension_token;

while (std::getline(dimension_stream, dimension_token, ',')) {
// Remove leading/trailing whitespaces
dimension_token.erase(0, dimension_token.find_first_not_of("\t"));
dimension_token.erase(dimension_token.find_last_not_of("\t") + 1);

// Check if dimension is a range
size_t range_seperator_pos = dimension_token.find("..");

Check notice on line 256 in onnxruntime/core/providers/openvino/openvino_provider_factory.cc

View workflow job for this annotation

GitHub Actions / misspell

[misspell] onnxruntime/core/providers/openvino/openvino_provider_factory.cc#L256

"seperator" is a misspelling of "separator"
Raw output
./onnxruntime/core/providers/openvino/openvino_provider_factory.cc:256:25: "seperator" is a misspelling of "separator"
if (range_seperator_pos != std::string::npos) {

Check notice on line 257 in onnxruntime/core/providers/openvino/openvino_provider_factory.cc

View workflow job for this annotation

GitHub Actions / misspell

[misspell] onnxruntime/core/providers/openvino/openvino_provider_factory.cc#L257

"seperator" is a misspelling of "separator"
Raw output
./onnxruntime/core/providers/openvino/openvino_provider_factory.cc:257:22: "seperator" is a misspelling of "separator"
std::string range_start_str = dimension_token.substr(0, range_seperator_pos);

Check notice on line 258 in onnxruntime/core/providers/openvino/openvino_provider_factory.cc

View workflow job for this annotation

GitHub Actions / misspell

[misspell] onnxruntime/core/providers/openvino/openvino_provider_factory.cc#L258

"seperator" is a misspelling of "separator"
Raw output
./onnxruntime/core/providers/openvino/openvino_provider_factory.cc:258:76: "seperator" is a misspelling of "separator"
std::string range_end_str = dimension_token.substr(range_seperator_pos + 2);

Check notice on line 259 in onnxruntime/core/providers/openvino/openvino_provider_factory.cc

View workflow job for this annotation

GitHub Actions / misspell

[misspell] onnxruntime/core/providers/openvino/openvino_provider_factory.cc#L259

"seperator" is a misspelling of "separator"
Raw output
./onnxruntime/core/providers/openvino/openvino_provider_factory.cc:259:71: "seperator" is a misspelling of "separator"

// Remove leading/trailing spaced
range_start_str.erase(0, range_start_str.find_first_not_of("\t"));
range_start_str.erase(range_start_str.find_last_not_of("\t") + 1);
range_end_str.erase(0, range_end_str.find_first_not_of("\t"));
range_end_str.erase(range_end_str.find_last_not_of("\t") + 1);

if (range_start_str.empty() || range_end_str.empty() ||
!std::all_of(range_start_str.begin(), range_start_str.end(), ::isdigit) ||
!std::all_of(range_end_str.begin(), range_end_str.end(), ::isdigit)) {
ORT_THROW("Invalid dimension range format: " + dimension_token + " for tensor: " + tensor_name);
}

int range_start = std::stoi(range_start_str);
int range_end = std::stoi(range_end_str);

if (range_start > range_end) {
ORT_THROW("Invalid dimension range (start > end) for tensor: " + tensor_name);
}

dimension_values.emplace_back(ov::Dimension(range_start, range_end));
} else {
// Handle single dimension value
if (dimension_token.empty() ||
!std::all_of(dimension_token.begin(), dimension_token.end(), ::isdigit)) {
ORT_THROW("Invalid dimension value: " + dimension_token + " for tensor: " + tensor_name);
}
dimension_values.emplace_back(std::stoi(dimension_token));
}
}

// Store parsed shape in result map
parsed_shape_map[tensor_name] = ov::PartialShape(dimension_values);
// Update reminaing unparsed string
unparsed_definition = unparsed_definition.substr(shape_end_bracket + 1);
if (!unparsed_definition.empty() && unparsed_definition.front() == ',') {
unparsed_definition = unparsed_definition.substr(1);
}
// Remove leading whitespaces
unparsed_definition.erase(0, unparsed_definition.find_first_not_of("\t"));
}
return parsed_shape_map;
};
pi.shape = parse_input_shapes(provider_options.at("reshape_input"));
}

if (provider_options.contains("load_config")) {
auto parse_config = [&](const std::string& config_str) -> std::map<std::string, ov::AnyMap> {
// If the config string is empty, return an empty map and skip processing
Expand Down
12 changes: 11 additions & 1 deletion onnxruntime/core/providers/openvino/ov_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,16 @@ OVTensorPtr OVInferRequest::GetTensor(const std::string& input_name) {
}
}

OVTensor OVInferRequest::GetOutputTensor(const int& output_idx) {
try {
return ovInfReq.get_output_tensor(output_idx);
} catch (const Exception& e) {
ORT_THROW(log_tag + " Cannot access output tensor: " + e.what());
} catch (...) {
ORT_THROW(log_tag + " Cannot access output tensor");
}
}

std::string OVInferRequest::GetInputTensorName(uint32_t index) {
try {
const auto& model = ovInfReq.get_compiled_model();
Expand All @@ -233,7 +243,7 @@ void OVInferRequest::SetTensor(const std::string& name, OVTensorPtr& blob) {
}

uint32_t OVInferRequest::GetNumInputs() {
return ovInfReq.get_compiled_model().inputs().size();
return static_cast<uint32_t>(ovInfReq.get_compiled_model().inputs().size());
}

void OVInferRequest::StartAsync() {
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class OVInferRequest {
OVTensorPtr GetTensor(const std::string& name);
std::string GetInputTensorName(uint32_t index);
void SetTensor(const std::string& name, OVTensorPtr& blob);
OVTensor GetOutputTensor(const int& output_idx);
void StartAsync();
void Infer();
void WaitRequest();
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/test/perftest/ort_test_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
}
} else if (key == "device_memory_name") {
device_memory_name_ = std::move(value);
} else if (key == "reshape_input") {
ov_options[key] = value;
} else {
ORT_THROW(
"[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO."
Expand Down
Loading