roryford · roryford · Jun 15, 2026 · Jun 15, 2026
@@ -1,13 +1,71 @@
 import Foundation
 
+/// Static capabilities advertised by an ``EmbeddingBackend``.
+///
+/// All fields have safe defaults, so a backend that does not advertise anything
+/// specific reports ``EmbeddingCapabilities/default``. Consumers should treat a
+/// `nil` bound as "unspecified / unbounded" rather than zero.
+public struct EmbeddingCapabilities: Sendable, Codable, Equatable, Hashable {
+    /// Maximum number of texts accepted in a single ``EmbeddingBackend/embed(_:)``
+    /// call. `nil` means unspecified / unbounded.
+    public var maxBatchSize: Int?
+
+    /// Maximum number of tokens per input text. `nil` means unspecified.
+    public var maxInputLength: Int?
+
+    /// Whether the backend returns L2-normalized (unit-length) vectors. Defaults
+    /// to `false`; consumers that require normalized vectors must normalize
+    /// themselves when this is `false`.
+    public var producesNormalizedVectors: Bool
+
+    public init(
+        maxBatchSize: Int? = nil,
+        maxInputLength: Int? = nil,
+        producesNormalizedVectors: Bool = false
+    ) {
+        self.maxBatchSize = maxBatchSize
+        self.maxInputLength = maxInputLength
+        self.producesNormalizedVectors = producesNormalizedVectors
+    }
+
+    /// The conservative default: no advertised bounds, vectors not normalized.
+    public static let `default` = EmbeddingCapabilities()
+}
+
 public protocol EmbeddingBackend: AnyObject, Sendable {
     var isModelLoaded: Bool { get }
+
+    /// The dimensionality of vectors produced by ``embed(_:)``.
+    ///
+    /// Only meaningful after a successful ``loadModel(from:)`` — i.e. when
+    /// ``isModelLoaded`` is `true`. Behavior before a model is loaded is
+    /// backend-defined (a backend may return `0`, a placeholder, or its
+    /// model-independent default).
     var dimensions: Int { get }
+
+    /// Static capabilities of this backend (batch/input bounds, normalization).
+    ///
+    /// A default implementation returns ``EmbeddingCapabilities/default``, so
+    /// existing conformers need not implement this.
+    var capabilities: EmbeddingCapabilities { get }
+
     func loadModel(from url: URL) async throws
+
+    /// Produces one embedding vector per input text.
+    ///
+    /// - Postcondition: the returned array has exactly one vector per input
+    ///   (`result.count == texts.count`), each of length ``dimensions``. A
+    ///   backend that cannot satisfy this signals the violation by throwing
+    ///   ``EmbeddingError/dimensionMismatch(expected:actual:)``.
     func embed(_ texts: [String]) async throws -> [[Float]]
+
     func unloadModel()
 }
 
+public extension EmbeddingBackend {
+    var capabilities: EmbeddingCapabilities { .default }
+}
+
 public enum EmbeddingError: LocalizedError {
     case modelNotLoaded
     case dimensionMismatch(expected: Int, actual: Int)

@@ -88,6 +88,20 @@ public struct LlamaMirostatV2SamplerOptions: Sendable, Codable, Equatable {
 }
 
 /// Sampling and generation parameters shared across all inference backends.
+///
+/// ## Throw vs. silently ignore
+///
+/// Fields fall into two contractual classes, and backends honour them differently:
+///
+/// - **Capability-gated guarantees** (e.g. ``GenerationConfig/grammar``) carry a
+///   *guarantee*: a backend that cannot honour the request MUST throw the matching
+///   ``InferenceError`` (e.g. ``InferenceError/unsupportedGrammar``) rather than
+///   silently degrade. Callers rely on the error to know the constraint was not applied.
+/// - **Advisory hints** (e.g. ``seed``, ``minP``, ``jsonMode``, the sampler penalties,
+///   and the vendor knobs below) are best-effort: backends that do not support a hint
+///   silently ignore it. A missing hint is never an error.
+///
+/// See each field's own documentation for the per-backend specifics.
 public struct GenerationConfig: Sendable, Codable {
     public var temperature: Float
     public var topP: Float
@@ -149,6 +163,11 @@ public struct GenerationConfig: Sendable, Codable {
     /// ``presenceContextSize`` for why llama.cpp ignores it.
     public var frequencyContextSize: Int?
 
+    // The three `llama*` fields below are backend-specific knobs living on the
+    // shared GenerationConfig type. This is accepted tech debt: relocating them
+    // into the companion llama package is deferred (cross-repo blast radius) and
+    // tracked in #1834.
+
     /// llama.cpp DRY repetition sampler options.
     ///
     /// `nil` (the default) preserves the backend's existing sampler chain. When

@@ -75,4 +75,24 @@ final class EmbeddingBackendProtocolTests: XCTestCase {
         XCTAssertNotNil(desc)
         XCTAssertFalse(desc!.isEmpty)
     }
+
+    // MARK: - EmbeddingCapabilities
+
+    /// A conformer that does not override `capabilities` inherits the
+    /// protocol-extension default — proving the addition is non-breaking.
+    func test_capabilities_defaultsToDefault_whenNotOverridden() {
+        let backend = MockEmbeddingBackend()
+        XCTAssertEqual(backend.capabilities, .default)
+    }
+
+    func test_embeddingCapabilities_codableRoundTrips() throws {
+        let original = EmbeddingCapabilities(
+            maxBatchSize: 32,
+            maxInputLength: 512,
+            producesNormalizedVectors: true
+        )
+        let data = try JSONEncoder().encode(original)
+        let decoded = try JSONDecoder().decode(EmbeddingCapabilities.self, from: data)
+        XCTAssertEqual(decoded, original)
+    }
 }