Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions Sources/ManifoldContract/EmbeddingBackend.swift
Original file line number Diff line number Diff line change
@@ -1,13 +1,71 @@
import Foundation

/// Static capabilities advertised by an ``EmbeddingBackend``.
///
/// All fields have safe defaults, so a backend that does not advertise anything
/// specific reports ``EmbeddingCapabilities/default``. Consumers should treat a
/// `nil` bound as "unspecified / unbounded" rather than zero.
public struct EmbeddingCapabilities: Sendable, Codable, Equatable, Hashable {
/// Maximum number of texts accepted in a single ``EmbeddingBackend/embed(_:)``
/// call. `nil` means unspecified / unbounded.
public var maxBatchSize: Int?

/// Maximum number of tokens per input text. `nil` means unspecified.
public var maxInputLength: Int?

/// Whether the backend returns L2-normalized (unit-length) vectors. Defaults
/// to `false`; consumers that require normalized vectors must normalize
/// themselves when this is `false`.
public var producesNormalizedVectors: Bool

public init(
maxBatchSize: Int? = nil,
maxInputLength: Int? = nil,
producesNormalizedVectors: Bool = false
) {
self.maxBatchSize = maxBatchSize
self.maxInputLength = maxInputLength
self.producesNormalizedVectors = producesNormalizedVectors
}

/// The conservative default: no advertised bounds, vectors not normalized.
public static let `default` = EmbeddingCapabilities()
}

public protocol EmbeddingBackend: AnyObject, Sendable {
var isModelLoaded: Bool { get }

/// The dimensionality of vectors produced by ``embed(_:)``.
///
/// Only meaningful after a successful ``loadModel(from:)`` — i.e. when
/// ``isModelLoaded`` is `true`. Behavior before a model is loaded is
/// backend-defined (a backend may return `0`, a placeholder, or its
/// model-independent default).
var dimensions: Int { get }

/// Static capabilities of this backend (batch/input bounds, normalization).
///
/// A default implementation returns ``EmbeddingCapabilities/default``, so
/// existing conformers need not implement this.
var capabilities: EmbeddingCapabilities { get }

func loadModel(from url: URL) async throws

/// Produces one embedding vector per input text.
///
/// - Postcondition: the returned array has exactly one vector per input
/// (`result.count == texts.count`), each of length ``dimensions``. A
/// backend that cannot satisfy this signals the violation by throwing
/// ``EmbeddingError/dimensionMismatch(expected:actual:)``.
func embed(_ texts: [String]) async throws -> [[Float]]

func unloadModel()
}

public extension EmbeddingBackend {
var capabilities: EmbeddingCapabilities { .default }
}

public enum EmbeddingError: LocalizedError {
case modelNotLoaded
case dimensionMismatch(expected: Int, actual: Int)
Expand Down
19 changes: 19 additions & 0 deletions Sources/ManifoldContract/InferenceBackend.swift
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ public struct LlamaMirostatV2SamplerOptions: Sendable, Codable, Equatable {
}

/// Sampling and generation parameters shared across all inference backends.
///
/// ## Throw vs. silently ignore
///
/// Fields fall into two contractual classes, and backends honour them differently:
///
/// - **Capability-gated guarantees** (e.g. ``GenerationConfig/grammar``) carry a
/// *guarantee*: a backend that cannot honour the request MUST throw the matching
/// ``InferenceError`` (e.g. ``InferenceError/unsupportedGrammar``) rather than
/// silently degrade. Callers rely on the error to know the constraint was not applied.
/// - **Advisory hints** (e.g. ``seed``, ``minP``, ``jsonMode``, the sampler penalties,
/// and the vendor knobs below) are best-effort: backends that do not support a hint
/// silently ignore it. A missing hint is never an error.
///
/// See each field's own documentation for the per-backend specifics.
public struct GenerationConfig: Sendable, Codable {
public var temperature: Float
public var topP: Float
Expand Down Expand Up @@ -149,6 +163,11 @@ public struct GenerationConfig: Sendable, Codable {
/// ``presenceContextSize`` for why llama.cpp ignores it.
public var frequencyContextSize: Int?

// The three `llama*` fields below are backend-specific knobs living on the
// shared GenerationConfig type. This is accepted tech debt: relocating them
// into the companion llama package is deferred (cross-repo blast radius) and
// tracked in #1834.

/// llama.cpp DRY repetition sampler options.
///
/// `nil` (the default) preserves the backend's existing sampler chain. When
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,24 @@ final class EmbeddingBackendProtocolTests: XCTestCase {
XCTAssertNotNil(desc)
XCTAssertFalse(desc!.isEmpty)
}

// MARK: - EmbeddingCapabilities

/// A conformer that does not override `capabilities` inherits the
/// protocol-extension default — proving the addition is non-breaking.
func test_capabilities_defaultsToDefault_whenNotOverridden() {
let backend = MockEmbeddingBackend()
XCTAssertEqual(backend.capabilities, .default)
}

func test_embeddingCapabilities_codableRoundTrips() throws {
let original = EmbeddingCapabilities(
maxBatchSize: 32,
maxInputLength: 512,
producesNormalizedVectors: true
)
let data = try JSONEncoder().encode(original)
let decoded = try JSONDecoder().decode(EmbeddingCapabilities.self, from: data)
XCTAssertEqual(decoded, original)
}
}