Skip to content

Commit 36bb9e3

Browse files
feat(providers): add DeepInfra as a built-in inference provider (#1902)
* feat(providers): add DeepInfra as a built-in inference provider (v2 only) - Adds `deepinfra` as a built-in Providers v2 profile (`providers/deepinfra.yaml`) with inference category, Bearer auth, and `DEEPINFRA_API_KEY` discovery - Adds `DEEPINFRA_PROFILE` to inference routing so `inference.local` works with the `deepinfra` provider type - Fixes `build_backend_url` to strip `/v1` from request paths when the base URL contains `/v1/` as an internal segment (e.g. `api.deepinfra.com/v1/openai`), preventing double-versioned paths like `.../v1/openai/v1/chat/completions` - Updates `docs/sandboxes/providers-v2.mdx` and `docs/sandboxes/manage-providers.mdx` with DeepInfra entries; removes the old v1 workaround row that used `openai` type with `OPENAI_API_KEY` Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com> * fix(providers): address gator review findings for DeepInfra provider - Narrow build_backend_url /v1 dedupe to URLs whose path component is exactly /v1 or starts with /v1/ — prevents regression on proxy endpoints where /v1 is buried deeper (e.g. /api/v1/openai); add regression test for the nested proxy path case - Add deepinfra provider plugin with DEEPINFRA_API_KEY discovery, registered in ProviderRegistry so known_types() and TUI include it - Add deepinfra to unsupported-inference-provider error message in openshell-server for accurate user-facing debugging guidance - Add deepinfra to openai_compatible_profiles_include_embeddings test to lock in the OpenAI-compatible protocol contract Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com> * fix(router): handle /v1 as final path segment in build_backend_url dedup Extends the /v1 deduplication logic to also strip /v1 from request paths when the base URL's path ends with /v1 (e.g. https://api.groq.com/openai/v1). The previous fix only matched paths starting with /v1/, which regressed providers like Groq whose base path has /v1 as the last segment rather than the first. The nested-proxy exclusion (e.g. /api/v1/openai) is preserved since /v1 appears in the middle — neither first nor last segment. Adds a regression test for the Groq-style base URL. Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com> * fix(providers): add deepinfra telemetry bucket and update profile list test - Add DeepInfra variant to ProviderProfile telemetry enum and from_raw() mapping so deepinfra providers are tracked in their own bucket rather than falling through to Custom - Map deepinfra in telemetry_provider_profile() in openshell-server - Add deepinfra to list_provider_profiles_returns_built_in_profile_categories test (sorted between cursor and github) - Update architecture/gateway.md inference provider list to include deepinfra Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com> * style(router): apply cargo fmt to backend.rs Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com> --------- Signed-off-by: Milos Milutinovic <codemastermilos@gmail.com>
1 parent ff028ce commit 36bb9e3

13 files changed

Lines changed: 131 additions & 5 deletions

File tree

architecture/gateway.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ Cluster inference routes store only `provider_name`, `model_id`, and optional
265265
timeout. The gateway resolves endpoint URLs, protocols, credentials, auth
266266
style, and route-shaping metadata from the provider record when supervisors call
267267
`GetInferenceBundle`. Supported provider types for cluster inference are
268-
`openai`, `anthropic`, `nvidia`, and `google-vertex-ai`.
268+
`openai`, `anthropic`, `nvidia`, `deepinfra`, and `google-vertex-ai`.
269269

270270
The bundle carries enough information for sandbox-local routers to construct
271271
upstream URLs without re-deriving provider-specific routing logic. Each resolved

crates/openshell-core/src/inference.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,17 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
155155
passthrough_headers: &["x-model-id"],
156156
};
157157

158+
static DEEPINFRA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
159+
provider_type: "deepinfra",
160+
default_base_url: "https://api.deepinfra.com/v1/openai",
161+
protocols: OPENAI_PROTOCOLS,
162+
credential_key_names: &["DEEPINFRA_API_KEY"],
163+
base_url_config_keys: &["DEEPINFRA_BASE_URL"],
164+
auth: AuthHeader::Bearer,
165+
default_headers: &[],
166+
passthrough_headers: &["x-model-id"],
167+
};
168+
158169
/// Canonicalize an inference provider type string to a well-known identifier.
159170
///
160171
/// Returns `Some(canonical_name)` for recognized inference providers,
@@ -167,6 +178,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> {
167178
"openai" => Some("openai"),
168179
"anthropic" => Some("anthropic"),
169180
"nvidia" => Some("nvidia"),
181+
"deepinfra" => Some("deepinfra"),
170182
"google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => {
171183
Some("google-vertex-ai")
172184
}
@@ -183,6 +195,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
183195
"openai" => Some(&OPENAI_PROFILE),
184196
"anthropic" => Some(&ANTHROPIC_PROFILE),
185197
"nvidia" => Some(&NVIDIA_PROFILE),
198+
"deepinfra" => Some(&DEEPINFRA_PROFILE),
186199
"google-vertex-ai" => Some(&VERTEX_AI_PROFILE),
187200
_ => None,
188201
}
@@ -303,12 +316,24 @@ mod tests {
303316
assert!(profile_for("openai").is_some());
304317
assert!(profile_for("anthropic").is_some());
305318
assert!(profile_for("nvidia").is_some());
319+
assert!(profile_for("deepinfra").is_some());
306320
assert!(profile_for("OpenAI").is_some()); // case insensitive
307321
}
308322

323+
#[test]
324+
fn profile_for_deepinfra() {
325+
let profile = profile_for("deepinfra").expect("deepinfra profile should exist");
326+
assert_eq!(profile.provider_type, "deepinfra");
327+
assert_eq!(
328+
profile.default_base_url,
329+
"https://api.deepinfra.com/v1/openai"
330+
);
331+
assert_eq!(profile.auth, AuthHeader::Bearer);
332+
}
333+
309334
#[test]
310335
fn openai_compatible_profiles_include_embeddings() {
311-
for provider_type in ["openai", "nvidia"] {
336+
for provider_type in ["openai", "nvidia", "deepinfra"] {
312337
let profile = profile_for(provider_type).expect("provider profile should exist");
313338
assert!(
314339
profile.protocols.contains(&"openai_embeddings"),

crates/openshell-core/src/telemetry.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ pub enum ProviderProfile {
205205
Claude,
206206
Codex,
207207
Copilot,
208+
Deepinfra,
208209
Github,
209210
Gitlab,
210211
Nvidia,
@@ -222,6 +223,7 @@ impl ProviderProfile {
222223
Self::Claude => "claude",
223224
Self::Codex => "codex",
224225
Self::Copilot => "copilot",
226+
Self::Deepinfra => "deepinfra",
225227
Self::Github => "github",
226228
Self::Gitlab => "gitlab",
227229
Self::Nvidia => "nvidia",
@@ -239,6 +241,7 @@ impl ProviderProfile {
239241
"claude" | "claude-code" => Self::Claude,
240242
"codex" => Self::Codex,
241243
"copilot" => Self::Copilot,
244+
"deepinfra" => Self::Deepinfra,
242245
"github" | "gh" => Self::Github,
243246
"gitlab" | "glab" => Self::Gitlab,
244247
"nvidia" => Self::Nvidia,

crates/openshell-providers/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ impl ProviderRegistry {
116116
registry.register(providers::openai::SPEC);
117117
registry.register(providers::anthropic::SPEC);
118118
registry.register(providers::nvidia::SPEC);
119+
registry.register(providers::deepinfra::SPEC);
119120
registry.register(providers::gitlab::SPEC);
120121
registry.register(providers::github::SPEC);
121122
registry.register(providers::outlook::OutlookProvider);

crates/openshell-providers/src/profiles.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ const BUILT_IN_PROFILE_YAMLS: &[&str] = &[
2424
include_str!("../../../providers/codex.yaml"),
2525
include_str!("../../../providers/copilot.yaml"),
2626
include_str!("../../../providers/cursor.yaml"),
27+
include_str!("../../../providers/deepinfra.yaml"),
2728
include_str!("../../../providers/github.yaml"),
2829
include_str!("../../../providers/google-vertex-ai.yaml"),
2930
include_str!("../../../providers/nvidia.yaml"),
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use crate::ProviderDiscoverySpec;
5+
6+
pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
7+
id: "deepinfra",
8+
credential_env_vars: &["DEEPINFRA_API_KEY"],
9+
};
10+
11+
test_discovers_env_credential!(
12+
discovers_deepinfra_env_credentials,
13+
"DEEPINFRA_API_KEY",
14+
"di-test-123"
15+
);

crates/openshell-providers/src/providers/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ pub mod anthropic;
3434
pub mod claude;
3535
pub mod codex;
3636
pub mod copilot;
37+
pub mod deepinfra;
3738
pub mod generic;
3839
pub mod github;
3940
pub mod gitlab;

crates/openshell-router/src/backend.rs

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,21 @@ fn build_provider_url(
768768

769769
fn build_backend_url(endpoint: &str, path: &str) -> String {
770770
let base = endpoint.trim_end_matches('/');
771-
if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) {
771+
// Strip the /v1 prefix from the request path when the base URL's path
772+
// component has /v1 as its first segment (e.g. openai/nvidia: "/v1",
773+
// deepinfra: "/v1/openai") or its final segment (e.g. groq:
774+
// "/openai/v1"). This covers all known provider shapes while preserving
775+
// the full path for proxy endpoints where /v1 is buried in the middle
776+
// (e.g. "https://proxy.example/api/v1/openai" → path "/api/v1/openai",
777+
// neither first nor last segment).
778+
let base_path_has_v1_edge_segment = base
779+
.find("://")
780+
.and_then(|i| base[i + 3..].find('/').map(|j| i + 3 + j))
781+
.is_some_and(|path_start| {
782+
let base_path = &base[path_start..];
783+
base_path.starts_with("/v1/") || base_path.ends_with("/v1")
784+
});
785+
if base_path_has_v1_edge_segment && (path == "/v1" || path.starts_with("/v1/")) {
772786
return format!("{base}{}", &path[3..]);
773787
}
774788

@@ -831,6 +845,44 @@ mod tests {
831845
);
832846
}
833847

848+
#[test]
849+
fn build_backend_url_dedupes_v1_for_base_with_v1_subpath() {
850+
// DeepInfra base URL contains /v1/ internally — /v1 in the request
851+
// path must still be stripped so chat/completions is not doubled.
852+
assert_eq!(
853+
build_backend_url(
854+
"https://api.deepinfra.com/v1/openai",
855+
"/v1/chat/completions"
856+
),
857+
"https://api.deepinfra.com/v1/openai/chat/completions"
858+
);
859+
}
860+
861+
#[test]
862+
fn build_backend_url_dedupes_v1_for_base_ending_with_v1() {
863+
// Providers like Groq use a base URL where /v1 is the final segment
864+
// below a non-root prefix (e.g. /openai/v1). The /v1 in the request
865+
// path must still be stripped so it is not doubled.
866+
assert_eq!(
867+
build_backend_url("https://api.groq.com/openai/v1", "/v1/chat/completions"),
868+
"https://api.groq.com/openai/v1/chat/completions"
869+
);
870+
}
871+
872+
#[test]
873+
fn build_backend_url_preserves_v1_for_nested_proxy_path() {
874+
// A proxy whose base path has /v1 buried in the middle (neither first
875+
// nor last segment) must NOT have /v1 stripped — the full request
876+
// path must be appended so the upstream receives the correct prefix.
877+
assert_eq!(
878+
build_backend_url(
879+
"https://proxy.example/api/v1/openai",
880+
"/v1/chat/completions"
881+
),
882+
"https://proxy.example/api/v1/openai/v1/chat/completions"
883+
);
884+
}
885+
834886
fn test_route(endpoint: &str, protocols: &[&str], auth: AuthHeader) -> ResolvedRoute {
835887
ResolvedRoute {
836888
name: "inference.local".to_string(),

crates/openshell-server/src/grpc/provider.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2148,6 +2148,7 @@ fn telemetry_provider_profile(provider_type: &str) -> TelemetryProviderProfile {
21482148
Some("claude" | "claude-code") => TelemetryProviderProfile::Claude,
21492149
Some("codex") => TelemetryProviderProfile::Codex,
21502150
Some("copilot") => TelemetryProviderProfile::Copilot,
2151+
Some("deepinfra") => TelemetryProviderProfile::Deepinfra,
21512152
Some("github") => TelemetryProviderProfile::Github,
21522153
Some("gitlab") => TelemetryProviderProfile::Gitlab,
21532154
Some("nvidia") => TelemetryProviderProfile::Nvidia,
@@ -2646,6 +2647,7 @@ mod tests {
26462647
"codex",
26472648
"copilot",
26482649
"cursor",
2650+
"deepinfra",
26492651
"github",
26502652
"google-vertex-ai",
26512653
"nvidia",

crates/openshell-server/src/inference.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ fn resolve_provider_route(
620620
let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| {
621621
Status::invalid_argument(format!(
622622
"provider '{name}' has unsupported type '{raw_provider_type}' for cluster inference \
623-
(supported: openai, anthropic, nvidia, google-vertex-ai)",
623+
(supported: openai, anthropic, nvidia, deepinfra, google-vertex-ai)",
624624
name = provider.object_name()
625625
))
626626
})?;

0 commit comments

Comments
 (0)