EverOS/src/everos/config/settings.py at main · EverMind-AI/EverOS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
"""Application settings.

Loaded by :func:`load_settings`. Source priority (later wins):

    1. ``config/default.toml`` (shipped values; lowest priority)
    2. ``~/.everos/config.toml`` (user-level overrides; optional)
    3. ``.env`` file in the working directory (secrets / machine-specific)
    4. ``EVEROS_<SECTION>__<KEY>`` environment variables
    5. Init args passed programmatically (highest priority)

The user-level toml path defaults to ``~/.everos/config.toml``. Override
with the ``EVEROS_CONFIG_FILE`` environment variable. The file is
optional — if it does not exist, the source is silently skipped.

The settings tree mirrors the TOML structure: ``settings.sqlite.busy_timeout_ms``
maps to ``[sqlite].busy_timeout_ms`` and to ``EVEROS_SQLITE__BUSY_TIMEOUT_MS``.

``load_settings`` is ``functools.cache``-d so callers in hot paths (e.g.
:mod:`everos.component.utils.datetime`) don't re-parse the TOML on every
call. Tests that mutate environment variables must call
``load_settings.cache_clear()`` after the mutation to invalidate.
"""

from __future__ import annotations

import os
from functools import cache
from pathlib import Path
from typing import Literal
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError

from pydantic import BaseModel, Field, SecretStr, field_validator
from pydantic_settings import (
    BaseSettings,
    PydanticBaseSettingsSource,
    SettingsConfigDict,
    TomlConfigSettingsSource,
)

_DEFAULT_TOML_PATH = Path(__file__).parent / "default.toml"
_USER_TOML_ENV_VAR = "EVEROS_CONFIG_FILE"
_DEFAULT_USER_TOML_PATH = Path("~/.everos/config.toml").expanduser()


def _resolve_user_toml_path() -> Path:
    """Resolve the user-level ``config.toml`` path.

    Defaults to ``~/.everos/config.toml``; override with the
    ``EVEROS_CONFIG_FILE`` environment variable.
    """
    override = os.environ.get(_USER_TOML_ENV_VAR)
    return Path(override).expanduser() if override else _DEFAULT_USER_TOML_PATH


class MemorySettings(BaseModel):
    """memory-root configuration."""

    root: Path = Path("~/.everos")
    timezone: str = "UTC"
    """Effective timezone for date buckets and timestamps.

    Default ``"UTC"``. Override via ``[memory] timezone = "..."`` in
    TOML or ``EVEROS_MEMORY__TIMEZONE`` env var. Validated against
    :class:`zoneinfo.ZoneInfo` at load time, so an invalid name fails
    fast (no silent fallback). This is the **sole** source of truth for
    the project's effective timezone — the OS ``TZ`` env var is *not*
    consulted, keeping the configuration deterministic.
    """

    @field_validator("timezone")
    @classmethod
    def _validate_timezone(cls, v: str) -> str:
        try:
            ZoneInfo(v)
        except (ZoneInfoNotFoundError, ValueError) as exc:
            raise ValueError(f"invalid timezone: {v!r}") from exc
        return v


class ApiSettings(BaseModel):
    """HTTP API server bind configuration.

    Default ``host = "127.0.0.1"`` keeps the server on loopback only,
    matching the threat model in ``SECURITY.md``: EverOS ships **no
    built-in authentication**, so binding to a routable interface
    (``0.0.0.0`` etc.) without your own gateway / auth layer in front
    is unsupported.

    Env binding:
        EVEROS_API__HOST
        EVEROS_API__PORT
    """

    host: str = "127.0.0.1"
    port: int = Field(default=8000, ge=1, le=65535)


class SqliteSettings(BaseModel):
    """SQLite tunables applied as PRAGMAs on every new connection."""

    journal_mode: Literal["WAL", "DELETE", "MEMORY", "OFF", "TRUNCATE", "PERSIST"] = (
        "WAL"
    )
    synchronous: Literal["FULL", "NORMAL", "OFF", "EXTRA"] = "NORMAL"
    foreign_keys: bool = True
    temp_store: Literal["DEFAULT", "FILE", "MEMORY"] = "MEMORY"
    busy_timeout_ms: int = Field(default=5000, ge=0)
    journal_size_limit_bytes: int = Field(default=64 * 1024 * 1024, ge=0)
    cache_size_kb: int = Field(default=2048, ge=0)


class LLMSettings(BaseModel):
    """LLM client configuration.

    Read by the service layer when lazily constructing the LLM client
    handed to algo extractors. Provider-agnostic field names — the
    project follows the OpenAI API protocol so any OpenAI-compatible
    endpoint plugs in via ``base_url``.

    Env binding (via parent ``Settings``):
        EVEROS_LLM__MODEL
        EVEROS_LLM__API_KEY
        EVEROS_LLM__BASE_URL
    """

    model: str = "gpt-4o-mini"
    api_key: SecretStr | None = None
    base_url: str | None = None


class MultimodalSettings(BaseModel):
    """Multimodal parsing LLM config (everalgo-parser).

    Flat section mirroring ``[llm]``. The model must accept multimodal
    ``image_url`` parts (image / pdf / audio); it is kept independent from
    the main ``[llm]`` so parsing can target a vision/audio-capable
    endpoint without affecting boundary / extraction.

    Env binding (via parent ``Settings``):
        EVEROS_MULTIMODAL__MODEL
        EVEROS_MULTIMODAL__API_KEY
        EVEROS_MULTIMODAL__BASE_URL
        EVEROS_MULTIMODAL__MAX_CONCURRENCY
        EVEROS_MULTIMODAL__FILE_URI_ALLOW_DIRS
        EVEROS_MULTIMODAL__FILE_URI_MAX_BYTES
    """

    model: str = "google/gemini-3-flash-preview"
    api_key: SecretStr | None = None
    base_url: str | None = None
    max_concurrency: int = 4

    # ``file://`` content-item support (read locally by EverOS, not everalgo).
    file_uri_allow_dirs: list[str] = []
    """Allowlisted base dirs for ``file://`` uris. Empty = allow any readable
    file (local-first default); set to confine reads when the API is exposed."""
    file_uri_max_bytes: int = 50 * 1024 * 1024
    """Max size (bytes) of a ``file://`` asset; larger files are rejected."""


class EmbeddingSettings(BaseModel):
    """Embedding client configuration.

    OpenAI-compatible embedding endpoint. ``model`` / ``api_key`` /
    ``base_url`` are required at runtime when the embedding capability
    is enabled; the runtime knobs (``timeout`` etc.) have sensible
    defaults.

    Env binding:
        EVEROS_EMBEDDING__MODEL
        EVEROS_EMBEDDING__API_KEY
        EVEROS_EMBEDDING__BASE_URL
        EVEROS_EMBEDDING__TIMEOUT_SECONDS
        EVEROS_EMBEDDING__MAX_RETRIES
        EVEROS_EMBEDDING__BATCH_SIZE
        EVEROS_EMBEDDING__MAX_CONCURRENT
    """

    model: str | None = None
    api_key: SecretStr | None = None
    base_url: str | None = None
    timeout_seconds: float = Field(default=30.0, gt=0)
    max_retries: int = Field(default=3, ge=0)
    batch_size: int = Field(default=10, ge=1)
    max_concurrent: int = Field(default=5, ge=1)


class RerankSettings(BaseModel):
    """Rerank client configuration.

    Unlike LLM / embedding (single OpenAI-compatible shape), rerank API
    schemas differ between providers — DeepInfra uses ``POST {base_url}/
    {model}`` with a custom body, vLLM uses ``POST {base_url}/rerank``
    with ``{model, query, documents}``, DashScope (Aliyun Bailian)
    ``gte-rerank-v2`` uses ``POST {base_url}/api/v1/services/rerank/
    text-rerank/text-rerank`` with a nested ``{model, input, parameters}``
    body. ``provider`` picks which client implementation the factory builds.

    ``provider`` defaults to ``None`` — the factory then infers it from
    the ``base_url`` host (e.g. ``dashscope.aliyuncs.com`` → DashScope,
    ``*.deepinfra.com`` → DeepInfra), falling back to ``"deepinfra"`` when
    the host is unrecognized. Set ``provider`` explicitly to override the
    inference (required for self-hosted ``vllm`` on an arbitrary host).

    Env binding:
        EVEROS_RERANK__PROVIDER
        EVEROS_RERANK__MODEL
        EVEROS_RERANK__API_KEY
        EVEROS_RERANK__BASE_URL
        EVEROS_RERANK__TIMEOUT_SECONDS
        EVEROS_RERANK__MAX_RETRIES
        EVEROS_RERANK__BATCH_SIZE
        EVEROS_RERANK__MAX_CONCURRENT
    """

    provider: Literal["deepinfra", "vllm", "dashscope"] | None = None
    model: str | None = None
    api_key: SecretStr | None = None
    base_url: str | None = None
    timeout_seconds: float = Field(default=30.0, gt=0)
    max_retries: int = Field(default=3, ge=0)
    batch_size: int = Field(default=10, ge=1)
    max_concurrent: int = Field(default=5, ge=1)


class BoundaryDetectionSettings(BaseModel):
    """Hard limits passed through to ``everalgo`` BoundaryDetector."""

    hard_token_limit: int = Field(default=65536, ge=1)
    hard_msg_limit: int = Field(default=500, ge=1)


class MemorizeSettings(BaseModel):
    """Memorize use-case configuration.

    ``mode`` selects which boundary detector runs and which pipelines are
    dispatched. A service process serves one mode at a time; toggling
    requires a restart.

        - ``"chat"``  -> ``everalgo.user_memory.BoundaryDetector`` and only the
          user-memory pipeline runs.
        - ``"agent"`` -> ``everalgo.agent_memory.AgentBoundaryDetector`` and
          both user-memory + agent-memory pipelines run.

    ``session_lock_timeout_seconds`` caps how long one ``memorize()``
    invocation can hold the per-session lock. Covers boundary LLM call +
    memcell DB writes + (synchronous portion of) pipeline dispatch. Stops
    a stuck LLM from deadlocking subsequent concurrent calls on the same
    session_id: on timeout the outer ``asyncio.timeout`` cancels the task
    and the lock auto-releases.

    Env binding:
        EVEROS_MEMORIZE__MODE
        EVEROS_MEMORIZE__SESSION_LOCK_TIMEOUT_SECONDS
    """

    mode: Literal["chat", "agent"] = "agent"
    session_lock_timeout_seconds: float = Field(default=360.0, gt=0)


class SearchSettings(BaseModel):
    """Search-pipeline policy knobs.

    ``vector_strategy`` selects the read path taken by
    ``SearchMethod.VECTOR``:

    - ``"maxsim_atomic"`` (default) — ANN over ``atomic_fact.vector``
      (recall pool ``top_k * 20``, capped at 2000), max-pool the per-fact
      cosine by parent memcell, then reverse-resolve the top memcells back
      to episode rows. MaxSim over atomic facts; trades one extra LanceDB
      scan for finer-grained semantic match on long episodes.
    - ``"episode"`` — single-vector ANN over ``episode.vector`` (one vector
      per episode = the embedded Content section). The legacy path; kept
      so deployments can opt out via env.

    Env binding:
        EVEROS_SEARCH__VECTOR_STRATEGY={episode,maxsim_atomic}
    """

    vector_strategy: Literal["episode", "maxsim_atomic"] = "maxsim_atomic"


class LanceDBSettings(BaseModel):
    """LanceDB tunables.

    ``read_consistency_seconds``:
      ``None`` (omitted) → no consistency check (highest performance).
      ``0``              → strict consistency (every read).
      ``>0``             → eventual (interval between checks).

    ``index_cache_size_bytes``:
      Upper bound on LanceDB's global *index* cache (``GlobalIndexCache``
      in lance crate). Each cached entry is one opened FTS / vector /
      scalar index reader and **holds the file descriptors of its on-disk
      ``_indices/<uuid>/...`` files**.

      LanceDB's own default is ``None`` (unbounded), which on a long-
      running daemon means every new index UUID created by an
      ``optimize()`` call adds a fresh reader to the cache, and its
      FDs are never released — they leak monotonically until
      ``EMFILE`` (os error 24). Verified locally: 30 optimize cycles
      take FD usage from 0 to ~960 against macOS's default ``ulimit -n``
      of 256 / Linux's 1024.

      Setting a byte cap turns the cache into a real LRU: when it
      exceeds the cap, the oldest readers are dropped, Rust ``Drop``
      runs ``close(fd)``, and the FD pressure resolves itself.

      Cap → steady-state FD upper bound (measured under 30 add+optimize
      cycles with the real ``Episode`` schema and 100-query stress):

      ===========  =================  ===================
      cap          FD upper bound     query latency (100q)
      ===========  =================  ===================
      ``2 MB``     ~45                ~5 ms
      ``4 MB``     ~52                ~3 ms
      ``8 MB``     ~140               ~2.4 ms
      ``16 MB``    ~290               ~2.3 ms   ← default
      ``32 MB``    ~630               ~1.4 ms
      ``unbound``  >960 (leaks)       ~1.3 ms
      ===========  =================  ===================

      EverOS's measured steady-state working set after a 12 h
      ``rebuild_indexes`` cycle is ~50-100 readers / 3-6 MB resident
      (5 tables × ~7 BM25 columns × ~10 part_N entries each), so
      ``16 MB`` gives ~3× headroom for burst traffic and stale-but-not-
      yet-evicted readers, while the FD ceiling (~290) stays well below
      common ulimits (macOS default 256 needs ``ulimit -n 1024`` first;
      Linux default 1024 is fine out of the box).

      Override via ``EVEROS_LANCEDB__INDEX_CACHE_SIZE_BYTES`` if your
      working set is much larger (heavier table count or much wider
      indexes) or if you hit a tighter ``ulimit -n`` (containers / dev
      boxes).

      Note: the *metadata* cache (``metadata_cache_size_bytes``) is
      **not** exposed — experiment showed it caches in-memory parsed
      manifests / fragment stats with zero impact on FD count; leaving
      it unbounded (lancedb default) is fine.
    """

    read_consistency_seconds: float | None = None
    index_cache_size_bytes: int = 16 * 1024 * 1024


class Settings(BaseSettings):
    """Top-level application settings."""

    memory: MemorySettings = MemorySettings()
    api: ApiSettings = ApiSettings()
    sqlite: SqliteSettings = SqliteSettings()
    lancedb: LanceDBSettings = LanceDBSettings()
    llm: LLMSettings = LLMSettings()
    embedding: EmbeddingSettings = EmbeddingSettings()
    rerank: RerankSettings = RerankSettings()
    boundary_detection: BoundaryDetectionSettings = BoundaryDetectionSettings()
    memorize: MemorizeSettings = MemorizeSettings()
    search: SearchSettings = SearchSettings()
    multimodal: MultimodalSettings = MultimodalSettings()

    model_config = SettingsConfigDict(
        env_prefix="EVEROS_",
        env_nested_delimiter="__",
        env_file=".env",
        env_file_encoding="utf-8",
        toml_file=_DEFAULT_TOML_PATH,
        extra="ignore",
    )

    @classmethod
    def settings_customise_sources(
        cls,
        settings_cls: type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,
        file_secret_settings: PydanticBaseSettingsSource,
    ) -> tuple[PydanticBaseSettingsSource, ...]:
        """Layer TOML sources between env / dotenv and the secret store.

        Order (earlier wins in pydantic-settings):
            init_args > env > .env > user_toml > default_toml > secrets

        The user-level toml (default ``~/.everos/config.toml``) is only
        registered when the file exists, so the source list stays tight.
        """
        sources: list[PydanticBaseSettingsSource] = [
            init_settings,
            env_settings,
            dotenv_settings,
        ]
        user_toml_path = _resolve_user_toml_path()
        if user_toml_path.is_file():
            sources.append(
                TomlConfigSettingsSource(settings_cls, toml_file=user_toml_path)
            )
        sources.append(TomlConfigSettingsSource(settings_cls))
        sources.append(file_secret_settings)
        return tuple(sources)


@cache
def load_settings() -> Settings:
    """Load settings from default.toml + environment variables (cached).

    Cached at the module level — every caller sees the same instance until
    something explicitly clears the cache (``load_settings.cache_clear()``).
    Tests that monkeypatch environment variables must call
    ``cache_clear`` after each mutation to pick the new env up.
    """
    return Settings()