diff --git a/Sources/ManifoldContract/EmbeddingBackend.swift b/Sources/ManifoldContract/EmbeddingBackend.swift index 372a1bca..8ff2f813 100644 --- a/Sources/ManifoldContract/EmbeddingBackend.swift +++ b/Sources/ManifoldContract/EmbeddingBackend.swift @@ -1,13 +1,71 @@ import Foundation +/// Static capabilities advertised by an ``EmbeddingBackend``. +/// +/// All fields have safe defaults, so a backend that does not advertise anything +/// specific reports ``EmbeddingCapabilities/default``. Consumers should treat a +/// `nil` bound as "unspecified / unbounded" rather than zero. +public struct EmbeddingCapabilities: Sendable, Codable, Equatable, Hashable { + /// Maximum number of texts accepted in a single ``EmbeddingBackend/embed(_:)`` + /// call. `nil` means unspecified / unbounded. + public var maxBatchSize: Int? + + /// Maximum number of tokens per input text. `nil` means unspecified. + public var maxInputLength: Int? + + /// Whether the backend returns L2-normalized (unit-length) vectors. Defaults + /// to `false`; consumers that require normalized vectors must normalize + /// themselves when this is `false`. + public var producesNormalizedVectors: Bool + + public init( + maxBatchSize: Int? = nil, + maxInputLength: Int? = nil, + producesNormalizedVectors: Bool = false + ) { + self.maxBatchSize = maxBatchSize + self.maxInputLength = maxInputLength + self.producesNormalizedVectors = producesNormalizedVectors + } + + /// The conservative default: no advertised bounds, vectors not normalized. + public static let `default` = EmbeddingCapabilities() +} + public protocol EmbeddingBackend: AnyObject, Sendable { var isModelLoaded: Bool { get } + + /// The dimensionality of vectors produced by ``embed(_:)``. + /// + /// Only meaningful after a successful ``loadModel(from:)`` — i.e. when + /// ``isModelLoaded`` is `true`. Behavior before a model is loaded is + /// backend-defined (a backend may return `0`, a placeholder, or its + /// model-independent default). var dimensions: Int { get } + + /// Static capabilities of this backend (batch/input bounds, normalization). + /// + /// A default implementation returns ``EmbeddingCapabilities/default``, so + /// existing conformers need not implement this. + var capabilities: EmbeddingCapabilities { get } + func loadModel(from url: URL) async throws + + /// Produces one embedding vector per input text. + /// + /// - Postcondition: the returned array has exactly one vector per input + /// (`result.count == texts.count`), each of length ``dimensions``. A + /// backend that cannot satisfy this signals the violation by throwing + /// ``EmbeddingError/dimensionMismatch(expected:actual:)``. func embed(_ texts: [String]) async throws -> [[Float]] + func unloadModel() } +public extension EmbeddingBackend { + var capabilities: EmbeddingCapabilities { .default } +} + public enum EmbeddingError: LocalizedError { case modelNotLoaded case dimensionMismatch(expected: Int, actual: Int) diff --git a/Sources/ManifoldContract/InferenceBackend.swift b/Sources/ManifoldContract/InferenceBackend.swift index a07af6c5..7d9eacf1 100644 --- a/Sources/ManifoldContract/InferenceBackend.swift +++ b/Sources/ManifoldContract/InferenceBackend.swift @@ -88,6 +88,20 @@ public struct LlamaMirostatV2SamplerOptions: Sendable, Codable, Equatable { } /// Sampling and generation parameters shared across all inference backends. +/// +/// ## Throw vs. silently ignore +/// +/// Fields fall into two contractual classes, and backends honour them differently: +/// +/// - **Capability-gated guarantees** (e.g. ``GenerationConfig/grammar``) carry a +/// *guarantee*: a backend that cannot honour the request MUST throw the matching +/// ``InferenceError`` (e.g. ``InferenceError/unsupportedGrammar``) rather than +/// silently degrade. Callers rely on the error to know the constraint was not applied. +/// - **Advisory hints** (e.g. ``seed``, ``minP``, ``jsonMode``, the sampler penalties, +/// and the vendor knobs below) are best-effort: backends that do not support a hint +/// silently ignore it. A missing hint is never an error. +/// +/// See each field's own documentation for the per-backend specifics. public struct GenerationConfig: Sendable, Codable { public var temperature: Float public var topP: Float @@ -149,6 +163,11 @@ public struct GenerationConfig: Sendable, Codable { /// ``presenceContextSize`` for why llama.cpp ignores it. public var frequencyContextSize: Int? + // The three `llama*` fields below are backend-specific knobs living on the + // shared GenerationConfig type. This is accepted tech debt: relocating them + // into the companion llama package is deferred (cross-repo blast radius) and + // tracked in #1834. + /// llama.cpp DRY repetition sampler options. /// /// `nil` (the default) preserves the backend's existing sampler chain. When diff --git a/Tests/ManifoldInferenceTests/EmbeddingBackendProtocolTests.swift b/Tests/ManifoldInferenceTests/EmbeddingBackendProtocolTests.swift index 2aa69d73..38f74368 100644 --- a/Tests/ManifoldInferenceTests/EmbeddingBackendProtocolTests.swift +++ b/Tests/ManifoldInferenceTests/EmbeddingBackendProtocolTests.swift @@ -75,4 +75,24 @@ final class EmbeddingBackendProtocolTests: XCTestCase { XCTAssertNotNil(desc) XCTAssertFalse(desc!.isEmpty) } + + // MARK: - EmbeddingCapabilities + + /// A conformer that does not override `capabilities` inherits the + /// protocol-extension default — proving the addition is non-breaking. + func test_capabilities_defaultsToDefault_whenNotOverridden() { + let backend = MockEmbeddingBackend() + XCTAssertEqual(backend.capabilities, .default) + } + + func test_embeddingCapabilities_codableRoundTrips() throws { + let original = EmbeddingCapabilities( + maxBatchSize: 32, + maxInputLength: 512, + producesNormalizedVectors: true + ) + let data = try JSONEncoder().encode(original) + let decoded = try JSONDecoder().decode(EmbeddingCapabilities.self, from: data) + XCTAssertEqual(decoded, original) + } }