EverOS/methods/EverCore/env.template at main · Fearvox/EverOS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# =====================================================
# Memory System Configuration Template
# =====================================================
#
# SECURITY NOTICE:
# - Copy this file to .env and fill in your actual API keys
# - Never commit .env file to version control
# - Keep your API keys secure and private
#
# SETUP INSTRUCTIONS:
# 1. cp env.template .env
# 2. Edit .env with your actual values
# 3. The system will automatically load these values
# =====================================================


# ===================
# LLM Configuration
# ===================

# LLM_PROVIDER controls the default provider (used when scene config omits provider)
LLM_PROVIDER=openrouter
LLM_MODEL=x-ai/grok-4-fast
LLM_TEMPERATURE=0.3
LLM_MAX_TOKENS=32768
# Default provider fallback (legacy)
LLM_API_KEY=sk-or-v1-xxxx
LLM_BASE_URL=https://openrouter.ai/api/v1

# OpenRouter Configuration
# Preferred provider naming rule:
#   {PROVIDER}_API_KEY / {PROVIDER}_BASE_URL
# Supports multiple keys (comma-separated) for rate-limit distribution:
#   OPENROUTER_API_KEY=key1,key2,key3
OPENROUTER_API_KEY=your-openrouter-api-key
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
# Also supported:
#   {PROVIDER}_LLM_API_KEY / {PROVIDER}_LLM_BASE_URL
# Provider setting for OpenRouter, default is 'default', recommend 'cerebras' for qwen3
# LLM_OPENROUTER_PROVIDER=cerebras

# OpenAI Direct Access Configuration
# Preferred:
OPENAI_API_KEY=sk-xxxx
OPENAI_BASE_URL=https://api.openai.com/v1
# Also supported:
# OPENAI_LLM_API_KEY=sk-xxxx
# OPENAI_LLM_BASE_URL=https://api.openai.com/v1

# ===================
# Provider Model Whitelist (Optional)
# ===================
# Format: {PROVIDER}_WHITE_LIST=model1,model2,model3
# If not set or empty, all models are allowed for that provider.
# If set, only listed models can be used.
# OPENROUTER_WHITE_LIST=qwen/qwen3-235b-a22b-2507,openai/gpt-4.1-mini
# OPENAI_WHITE_LIST=gpt-5-mini,gpt-4.1-mini

# ===================
# Vectorize (Embedding) Service Configuration
# ===================

# ---- Primary Vectorize Provider ----
# Provider type: vllm (self-deployed), deepinfra (commercial API)
VECTORIZE_PROVIDER=vllm

# API key for primary provider (use "EMPTY" if not required for vllm service)
VECTORIZE_API_KEY=EMPTY

# Base URL for primary provider
# vLLM service example: http://localhost:8000/v1
# DeepInfra example: https://api.deepinfra.com/v1/openai
VECTORIZE_BASE_URL=http://localhost:8000/v1

# Model name (shared by both primary and fallback providers)
VECTORIZE_MODEL=Qwen/Qwen3-Embedding-4B

# ---- Fallback Vectorize Provider (Optional) ----
# Fallback provider type: vllm, deepinfra, or none (to disable fallback)
# Note: Fallback will be disabled if provider is "none", base_url is empty,
#       or api_key is empty (for deepinfra provider)
VECTORIZE_FALLBACK_PROVIDER=deepinfra

# API key for fallback provider (required for deepinfra, optional for vllm)
VECTORIZE_FALLBACK_API_KEY=xxxxx

# Base URL for fallback provider (required if fallback is enabled)
VECTORIZE_FALLBACK_BASE_URL=https://api.deepinfra.com/v1/openai

# ===== Common Settings =====
VECTORIZE_TIMEOUT=30
VECTORIZE_MAX_RETRIES=3
VECTORIZE_BATCH_SIZE=10
VECTORIZE_MAX_CONCURRENT=5
VECTORIZE_ENCODING_FORMAT=float

# Vector dimensions for client-side truncation
# Set to 0 to disable truncation and use full model dimensions
# Qwen3-Embedding-4B: full 2560D (DeepInfra) or 3584D (vLLM), recommend truncate to 1024D
# Note: Always uses client-side truncation with L2 re-normalization
VECTORIZE_DIMENSIONS=1024


# ===================
# Rerank Service Configuration
# ===================

# ---- Primary Rerank Provider ----
# Provider type: vllm (self-deployed), deepinfra (commercial API)
RERANK_PROVIDER=vllm

# API key for primary provider (use "EMPTY" if not required for vllm service)
RERANK_API_KEY=EMPTY

# Base URL for primary provider
# vLLM service example: http://localhost:12000/v1/rerank
# DeepInfra example: https://api.deepinfra.com/v1/inference
RERANK_BASE_URL=http://localhost:12000/v1/rerank

# Model name (shared by both primary and fallback providers)
RERANK_MODEL=Qwen/Qwen3-Reranker-4B

# ---- Fallback Rerank Provider (Optional) ----
# Fallback provider type: vllm, deepinfra, or none (to disable fallback)
# Note: Fallback will be disabled if provider is "none", base_url is empty,
#       or api_key is empty (for deepinfra provider)
RERANK_FALLBACK_PROVIDER=deepinfra

# API key for fallback provider (required for deepinfra, optional for vllm)
RERANK_FALLBACK_API_KEY=xxxxx

# Base URL for fallback provider (required if fallback is enabled)
RERANK_FALLBACK_BASE_URL=https://api.deepinfra.com/v1/inference

# ===== Common Settings =====
RERANK_TIMEOUT=30
RERANK_MAX_RETRIES=3
RERANK_BATCH_SIZE=10
RERANK_MAX_CONCURRENT=5


# ===================
# Tenant Configuration
# ===================

# Single-tenant mode: set a tenant ID for local development.
# All storage resources will be prefixed with this value (e.g., t_zhang_memsys).
# Use t_{yourname} to avoid conflicts with other developers on shared infrastructure.
# For multi-tenant deployment, leave this unset — tenant context comes from request headers.
TENANT_SINGLE_TENANT_ID=t_yourname

# ===================
# Redis Configuration
# ===================

REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_DB=8
REDIS_SSL=false

# ===================
# MongoDB Configuration
# ===================

MONGODB_HOST=localhost
MONGODB_PORT=27017
MONGODB_USERNAME=admin
MONGODB_PASSWORD=memsys123
MONGODB_URI_PARAMS=socketTimeoutMS=15000&authSource=admin
# MONGODB_DATABASE=memsys

# ===================
# Elasticsearch Configuration
# ===================

ES_HOSTS=http://localhost:19200
ES_USERNAME=
ES_PASSWORD=
ES_VERIFY_CERTS=false

# ===================
# Milvus Configuration
# ===================

MILVUS_HOST=localhost
MILVUS_PORT=19530


# ===================
# API Server Configuration
# ===================

# V3 API Base URL (for chat_with_memory.py and other clients)
API_BASE_URL=http://localhost:1995

# ===================
# Retrieve Configuration
# ===================

# Maximum return count when in unlimited mode (top_k=-1)
TOPK_LIMIT=100

# Recall multiplier: when top_k > 0, actual recall = top_k * RECALL_MULTIPLIER
RECALL_MULTIPLIER=2

# Milvus COSINE similarity threshold (0-1), applied in unlimited mode
MILVUS_SIMILARITY_THRESHOLD=0.6

# Rerank score threshold (0-1), applied in unlimited mode for quality filtering
RERANK_SCORE_THRESHOLD=0.6

# ===================
# Agentic Retrieval Configuration
# ===================

# Number of top results after reranking used for LLM sufficiency judgment
AGENTIC_ROUND1_RERANK_TOP_N=10

# ===================
# Agent Memorize Mode
# ===================

# Controls which MemorizeConfig is used for agent conversations.
# - online: full pipeline, fast skill search (default)
# - fast_skill: skip profile/foresight/eventlog, skip maturity scoring
AGENT_MEMORIZE_MODE=online

# ===================
# Environment & Logging Configuration
# ===================

LOG_LEVEL=INFO
ENV=dev
PYTHONASYNCIODEBUG=1
MEMORY_LANGUAGE=en