diff --git a/.env.example b/.env.example index 1c443d0d..71266f0c 100644 --- a/.env.example +++ b/.env.example @@ -1,385 +1,86 @@ # ============================================================================= -# PowerMem Configuration Template -# ============================================================================= -# Copy this file to .env and modify the values according to your needs -# -# Required Configuration: Database, LLM, Embedding -# Optional Configuration: Agent, Intelligent Memory, Performance, Security, etc. -# ============================================================================= - -# For a complete list of timezones, see: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones -TIMEZONE=Asia/Shanghai - -# ============================================================================= -# 1. Database Configuration (Required) -# ============================================================================= -# Choose your database provider: sqlite, oceanbase, postgres -DATABASE_PROVIDER=oceanbase - -# ----------------------------------------------------------------------------- -# SQLite Configuration (Default - Recommended for development) -# ----------------------------------------------------------------------------- -SQLITE_PATH=./data/powermem_dev.db -SQLITE_ENABLE_WAL=true -SQLITE_TIMEOUT=30 -SQLITE_COLLECTION=memories - -# ----------------------------------------------------------------------------- -# OceanBase Configuration -# ----------------------------------------------------------------------------- -# Connection mode: set OCEANBASE_HOST for remote, leave empty for embedded SeekDB -OCEANBASE_HOST= -OCEANBASE_PATH=./seekdb_data -OCEANBASE_PORT=2881 -OCEANBASE_USER=root@sys -OCEANBASE_PASSWORD=your_password -OCEANBASE_DATABASE=powermem -OCEANBASE_COLLECTION=memories - -## Keep the default settings, as modifications are generally not needed. -OCEANBASE_INDEX_TYPE=HNSW -OCEANBASE_VECTOR_METRIC_TYPE=cosine -OCEANBASE_TEXT_FIELD=document -OCEANBASE_VECTOR_FIELD=embedding -OCEANBASE_EMBEDDING_MODEL_DIMS=1536 -OCEANBASE_PRIMARY_FIELD=id -OCEANBASE_METADATA_FIELD=metadata -OCEANBASE_VIDX_NAME=memories_vidx - -# ----------------------------------------------------------------------------- -# PostgreSQL Configuration -# ----------------------------------------------------------------------------- -POSTGRES_HOST=127.0.0.1 -POSTGRES_PORT=5432 -POSTGRES_USER=postgres -POSTGRES_PASSWORD=your_password -POSTGRES_DATABASE=powermem -POSTGRES_COLLECTION=memories - -## Keep the default settings, as modifications are generally not needed. -POSTGRES_EMBEDDING_MODEL_DIMS=1536 -POSTGRES_DISKANN=true -POSTGRES_HNSW=true -# DATABASE_SSLMODE=prefer -# DATABASE_POOL_SIZE=10 -# DATABASE_MAX_OVERFLOW=20 - - -# ============================================================================= -# 2. LLM Configuration (Required) -# ============================================================================= -# Choose your LLM provider: qwen, openai, siliconflow, ollama, vllm, anthropic, deepseek -LLM_PROVIDER=qwen - +# PowerMem — minimal configuration +# ============================================================================= +# These are the ONLY variables you need to start PowerMem. Everything else has +# a safe default, so the system runs out of the box: +# +# - Database -> OceanBase provider with no host configured, which boots +# embedded seekdb on disk at ./seekdb_data (same engine, +# same SQL surface, no separate server). Set OCEANBASE_HOST +# in `.env.example.full` to point at a remote cluster. +# - Embedder -> built-in local all-MiniLM-L6-v2 (no API key required; +# model auto-downloads to ~/.cache on first use) +# - Reranker / graph store / telemetry / audit -> sensible defaults / off +# +# Want higher performance, or does your environment already have stronger +# infrastructure to plug into — a remote OceanBase cluster, a hosted embedding +# LLM, a rerank LLM, etc.? Don't replace this file; just open +# `.env.example.full`, copy the specific config block(s) you need, and paste +# them into your `.env` alongside the keys above. +# +# `.env.example.full` documents every available knob, grouped by component, so +# you can pick exactly what you want and leave the rest at safe defaults. +# ============================================================================= + +# ----------------------------------------------------------------------------- +# LLM (required) — the model PowerMem calls to extract facts from raw input, +# decide what to add/update/skip in memory, and synthesize answers at search +# time. Quality and cost here directly shape the quality of every memory you +# store and every result you retrieve. +# ----------------------------------------------------------------------------- +# +# LLM_PROVIDER — which gateway PowerMem talks to. +# Recommended: openai +# The `openai` provider speaks the OpenAI chat-completions protocol, now +# the de-facto standard. ANY OpenAI-compatible model can run through it — +# Qwen (including the Qwen "coding plan"), DeepSeek, SiliconFlow, vLLM, +# Ollama, etc. To use one of those, keep LLM_PROVIDER=openai and just point +# OPENAI_LLM_BASE_URL (below) at that vendor's OpenAI-compatible endpoint +# and set LLM_MODEL to its model name. +# Other (native) options: anthropic, qwen, siliconflow, deepseek, ollama, +# vllm — only needed for vendor features the plain OpenAI +# protocol can't express (e.g. Anthropic's native API). +# +# LLM_API_KEY — credential for the chosen provider / endpoint. +# Ignored for local providers (`ollama`, `vllm`). Where to get one: +# - OpenAI: https://platform.openai.com/api-keys +# - Qwen (Aliyun DashScope): https://dashscope.console.aliyun.com/ +# - SiliconFlow: https://siliconflow.cn/ +# - DeepSeek: https://platform.deepseek.com/ +# - Anthropic: https://console.anthropic.com/ +# +# LLM_MODEL — the specific model to call on that endpoint. +# Recommended: gpt-4o-mini (cheap, fast, solid extraction quality) +# Other OpenAI models: gpt-4o, gpt-4.1, gpt-4-turbo +# Via OPENAI_LLM_BASE_URL: use the target endpoint's own model name, e.g. +# qwen-plus / qwen-max (Qwen compatible-mode), +# deepseek-chat (DeepSeek), or any local model +# you've pulled (llama3.1:8b, qwen2.5:7b) +# +# OPENAI_LLM_BASE_URL — endpoint the `openai` provider calls. +# Default (real OpenAI): https://api.openai.com/v1 +# Set this when you want an OpenAI-COMPATIBLE model that is NOT OpenAI itself. +# Examples: +# - Qwen (DashScope compatible-mode, incl. coding plan): +# https://dashscope.aliyuncs.com/compatible-mode/v1 +# (intl: https://dashscope-intl.aliyuncs.com/compatible-mode/v1) +# - DeepSeek: https://api.deepseek.com/v1 +# - SiliconFlow: https://api.siliconflow.cn/v1 +# - Local vLLM: http://localhost:8000/v1 +# - Local Ollama: http://localhost:11434/v1 +# +# Need other providers' base URLs (QWEN_LLM_BASE_URL, OLLAMA_LLM_BASE_URL, +# VLLM_LLM_BASE_URL, ...)? They live in `.env.example.full` under the LLM +# section — copy just the line you need into this file. +LLM_PROVIDER=openai LLM_API_KEY=your_api_key_here -# Adjust the model according to your provider, gpt-4 advised to use when provider is openai -LLM_MODEL=qwen-plus - -## Keep the default settings, as modifications are generally not needed. -LLM_TEMPERATURE=0.7 -LLM_MAX_TOKENS=1000 -LLM_TOP_P=0.8 -LLM_TOP_K=50 -# Only supported by qwen provider -LLM_ENABLE_SEARCH=false - -# Default Base URLs for LLM providers, you can adjust if necessary -QWEN_LLM_BASE_URL=https://dashscope.aliyuncs.com/api/v1 +LLM_MODEL=gpt-4o-mini OPENAI_LLM_BASE_URL=https://api.openai.com/v1 +QWEN_LLM_BASE_URL=https://dashscope.aliyuncs.com/api/v1 SILICONFLOW_LLM_BASE_URL=https://api.siliconflow.cn/v1 OLLAMA_LLM_BASE_URL= VLLM_LLM_BASE_URL= ANTHROPIC_LLM_BASE_URL=https://api.anthropic.com DEEPSEEK_LLM_BASE_URL=https://api.deepseek.com -# ============================================================================= -# 3. Embedding Configuration (Required) -# ============================================================================= -# Choose your embedding provider: qwen, openai, siliconflow, huggingface, lmstudio, ollama -EMBEDDING_PROVIDER=qwen - -EMBEDDING_API_KEY=your_api_key_here -# Adjust the model according to your provider, text-embedding-ada-002 advised to use when provider is openai -EMBEDDING_MODEL=text-embedding-v4 -EMBEDDING_DIMS=1536 - -# Default Base URLs for embedding providers, you can adjust if necessary -QWEN_EMBEDDING_BASE_URL=https://dashscope.aliyuncs.com/api/v1 -OPENAI_EMBEDDING_BASE_URL=https://api.openai.com/v1 -# Set false for OpenAI-compatible APIs that reject output-dimension overrides (e.g. Qwen3-Embedding-8B) -# EMBEDDING_OPENAI_PASS_DIMENSIONS=false -SILICONFLOW_EMBEDDING_BASE_URL=https://api.siliconflow.cn/v1 -HUGGINFACE_EMBEDDING_BASE_URL= -LMSTUDIO_EMBEDDING_BASE_URL= -OLLAMA_EMBEDDING_BASE_URL= - -# ============================================================================= -# 4. Rerank Configuration (Optional) -# ============================================================================= -# Rerank configuration for reordering search results -RERANKER_ENABLED=false -RERANKER_PROVIDER=qwen -RERANKER_MODEL=qwen3-rerank -RERANKER_API_KEY=your_api_key_here -# RERANKER_API_BASE_URL= - -# Provider-specific configurations -# For Qwen: Uses DASHSCOPE_API_KEY and DASHSCOPE_BASE_URL if RERANKER_* not set -# For Jina: Uses JINA_API_KEY and JINA_API_BASE_URL if RERANKER_* not set -# For Zhipu AI: Uses ZAI_API_KEY and ZAI_API_BASE_URL if RERANKER_* not set - -# ============================================================================= -# 5. Agent Configuration (Optional) -# ============================================================================= -# Agent memory management settings -AGENT_ENABLED=true -AGENT_DEFAULT_SCOPE=AGENT -AGENT_DEFAULT_PRIVACY_LEVEL=PRIVATE -AGENT_DEFAULT_COLLABORATION_LEVEL=READ_ONLY -AGENT_DEFAULT_ACCESS_PERMISSION=OWNER_ONLY - -# Agent Memory Mode (auto, multi_agent, multi_user, hybrid) -AGENT_MEMORY_MODE=auto - - -# ============================================================================= -# 6. Intelligent Memory Configuration (Optional) -# ============================================================================= -# Ebbinghaus forgetting curve settings -INTELLIGENT_MEMORY_ENABLED=true -INTELLIGENT_MEMORY_INITIAL_RETENTION=1.0 -INTELLIGENT_MEMORY_DECAY_RATE=0.1 -INTELLIGENT_MEMORY_REINFORCEMENT_FACTOR=0.3 -INTELLIGENT_MEMORY_WORKING_THRESHOLD=0.3 -INTELLIGENT_MEMORY_SHORT_TERM_THRESHOLD=0.6 -INTELLIGENT_MEMORY_LONG_TERM_THRESHOLD=0.8 - -# Memory decay calculation settings -MEMORY_DECAY_ENABLED=true -MEMORY_DECAY_ALGORITHM=ebbinghaus -MEMORY_DECAY_BASE_RETENTION=1.0 -MEMORY_DECAY_FORGETTING_RATE=0.1 -MEMORY_DECAY_REINFORCEMENT_FACTOR=0.3 - -INTELLIGENT_MEMORY_FALLBACK_TO_SIMPLE_ADD=false - - -# ============================================================================= -# 7. Performance Configuration (Optional) -# ============================================================================= -# Memory management settings -MEMORY_BATCH_SIZE=100 -MEMORY_CACHE_SIZE=1000 -MEMORY_CACHE_TTL=3600 -MEMORY_SEARCH_LIMIT=10 -MEMORY_SEARCH_THRESHOLD=0.7 - -# Vector store settings -VECTOR_STORE_BATCH_SIZE=50 -VECTOR_STORE_CACHE_SIZE=500 -VECTOR_STORE_INDEX_REBUILD_INTERVAL=86400 - - -# ============================================================================= -# 8. Security Configuration (Optional) -# ============================================================================= -# Encryption settings -ENCRYPTION_ENABLED=false -ENCRYPTION_KEY= -ENCRYPTION_ALGORITHM=AES-256-GCM - -# Access control settings -ACCESS_CONTROL_ENABLED=true -ACCESS_CONTROL_DEFAULT_PERMISSION=READ_ONLY -ACCESS_CONTROL_ADMIN_USERS=admin,root - - -# ============================================================================= -# 9. Telemetry Configuration (Optional) -# ============================================================================= -# Usage analytics and monitoring -TELEMETRY_ENABLED=false -TELEMETRY_ENDPOINT=https://telemetry.powermem.ai -TELEMETRY_API_KEY= -TELEMETRY_BATCH_SIZE=100 -TELEMETRY_FLUSH_INTERVAL=30 -TELEMETRY_RETENTION_DAYS=30 - - -# ============================================================================= -# 10. Audit Configuration (Optional) -# ============================================================================= -# Audit logging settings -AUDIT_ENABLED=true -AUDIT_LOG_FILE=./logs/audit.log -AUDIT_LOG_LEVEL=INFO -AUDIT_RETENTION_DAYS=90 -AUDIT_COMPRESS_LOGS=true -AUDIT_LOG_ROTATION_SIZE=100MB - - -# ============================================================================= -# 11. Logging Configuration (Optional) -# ============================================================================= -# General logging settings -LOGGING_LEVEL=DEBUG -LOGGING_FORMAT=%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] [%(user_id)s] [%(agent_id)s] - %(message)s -LOGGING_FILE=./logs/powermem.log -LOGGING_MAX_SIZE=100MB -LOGGING_BACKUP_COUNT=5 -LOGGING_COMPRESS_BACKUPS=true - -# Console logging -LOGGING_CONSOLE_ENABLED=true -LOGGING_CONSOLE_LEVEL=INFO -LOGGING_CONSOLE_FORMAT=%(levelname)s - %(message)s - - -# ============================================================================= -# 12. Skill Store Configuration (Optional) -# ============================================================================= -# Enable skill storage (structured step-by-step procedures) -# Requires OceanBase with vector + fulltext support -SKILL_STORE_ENABLED=false -# Custom table name (default: {collection}_skills) -# SKILL_STORE_COLLECTION_NAME= -# Similarity threshold for dedup (0.0-1.0) -SKILL_STORE_SIMILARITY_THRESHOLD=0.75 - -# ============================================================================= -# 13. Graph Store Configuration (Optional) -# ============================================================================= -# Graph store for knowledge graph storage and retrieval -# Enable graph store functionality -GRAPH_STORE_ENABLED=false - -# Graph store provider (currently supports: oceanbase) -GRAPH_STORE_PROVIDER=oceanbase - -# OceanBase Graph Configuration -GRAPH_STORE_HOST=127.0.0.1 -GRAPH_STORE_PORT=2881 -GRAPH_STORE_USER=root@sys -GRAPH_STORE_PASSWORD=your_password -GRAPH_STORE_DB_NAME=powermem - -# Optional: Graph traversal settings -GRAPH_STORE_MAX_HOPS=3 - -# Optional: Graph store vector and index settings -# GRAPH_STORE_VECTOR_METRIC_TYPE=l2 -# GRAPH_STORE_INDEX_TYPE=HNSW - -# Optional: Custom prompts for graph operations -# GRAPH_STORE_CUSTOM_PROMPT= -# GRAPH_STORE_CUSTOM_EXTRACT_RELATIONS_PROMPT= -# GRAPH_STORE_CUSTOM_UPDATE_GRAPH_PROMPT= -# GRAPH_STORE_CUSTOM_DELETE_RELATIONS_PROMPT= - -# ============================================================================= -# 14. Sparse Embedding Configuration (Optional) -# ============================================================================= -# Choose your sparse embedding provider: qwen, openai -SPARSE_VECTOR_ENABLE=false -SPARSE_EMBEDDER_PROVIDER=qwen - -SPARSE_EMBEDDER_API_KEY=your_api_key_here -SPARSE_EMBEDDER_MODEL=text-embedding-v4 -SPARSE_EMBEDDING_BASE_URL=https://dashscope.aliyuncs.com/api/v1 - -# ============================================================================= -# 15. Query Rewrite Configuration (Optional) -# ============================================================================= -# Custom query rewritten prompt & he model used, keeping it from the same manufacturer as llm - -QUERY_REWRITE_ENABLED=false -# QUERY_REWRITE_PROMPT= -# QUERY_REWRITE_MODEL_OVERRIDE= - -# ============================================================================= -# 16. PowerMem HTTP API Server Configuration -# ============================================================================= -# Configuration for the PowerMem HTTP API Server -# ============================================================================= - -# ----------------------------------------------------------------------------- -# Server Settings -# ----------------------------------------------------------------------------- -# Server host address (0.0.0.0 to listen on all interfaces) -POWERMEM_SERVER_HOST=0.0.0.0 - -# Server port number -POWERMEM_SERVER_PORT=8000 - -# Number of worker processes (only used when reload=false) -POWERMEM_SERVER_WORKERS=4 - -# Enable auto-reload for development (true/false) -POWERMEM_SERVER_RELOAD=false - -# ----------------------------------------------------------------------------- -# Authentication Settings -# ----------------------------------------------------------------------------- -# Enable API key authentication (true/false) -POWERMEM_SERVER_AUTH_ENABLED=false - -# API keys (comma-separated list) -# Example: POWERMEM_SERVER_API_KEYS=key1,key2,key3 -POWERMEM_SERVER_API_KEYS= - -# ----------------------------------------------------------------------------- -# Rate Limiting Settings -# ----------------------------------------------------------------------------- -# Enable rate limiting (true/false) -POWERMEM_SERVER_RATE_LIMIT_ENABLED=true - -# Rate limit per minute per IP address -POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE=100 - -# ----------------------------------------------------------------------------- -# Logging Settings -# ----------------------------------------------------------------------------- -POWERMEM_SERVER_LOG_FILE=server.log - -# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL -POWERMEM_SERVER_LOG_LEVEL=INFO - -# Log format: json or text -POWERMEM_SERVER_LOG_FORMAT=text - -# ----------------------------------------------------------------------------- -# API Settings -# ----------------------------------------------------------------------------- -# API title (shown in Swagger UI) -POWERMEM_SERVER_API_TITLE=PowerMem API - -# API version -POWERMEM_SERVER_API_VERSION=v1 - -# API description (shown in Swagger UI) -POWERMEM_SERVER_API_DESCRIPTION=PowerMem HTTP API Server - Intelligent Memory System - -# ----------------------------------------------------------------------------- -# CORS Settings -# ----------------------------------------------------------------------------- -# Enable CORS (true/false) -POWERMEM_SERVER_CORS_ENABLED=true - -# CORS allowed origins (comma-separated, use * for all origins) -# Example: POWERMEM_SERVER_CORS_ORIGINS=http://localhost:3000,https://example.com -POWERMEM_SERVER_CORS_ORIGINS=* - -# ============================================================================= -# 17. Custom Prompts Configuration (Optional) -# ============================================================================= -# Override the built-in prompts used in the memory extraction pipeline. -# Leave commented out to use the default prompts. - -# Custom prompt for fact extraction (replaces the default FACT_RETRIEVAL_PROMPT) -# POWERMEM_CUSTOM_FACT_EXTRACTION_PROMPT= - -# Custom prompt for memory update decisions (replaces the default UPDATE_MEMORY_PROMPT) -# POWERMEM_CUSTOM_UPDATE_MEMORY_PROMPT= -# Custom prompt for importance evaluation (replaces the default importance scoring prompt) -# POWERMEM_CUSTOM_IMPORTANCE_EVALUATION_PROMPT= \ No newline at end of file diff --git a/.env.example.full b/.env.example.full new file mode 100644 index 00000000..3d5f1e01 --- /dev/null +++ b/.env.example.full @@ -0,0 +1,787 @@ +# ============================================================================= +# PowerMem — full configuration reference +# ============================================================================= +# This file documents every available configuration knob, grouped by component. +# Use it when you want to: +# - swap providers (Qwen ↔ OpenAI ↔ SiliconFlow ↔ Ollama ↔ ...), +# - tune database / vector store / graph store performance, +# - enable optional subsystems (reranker, intelligent memory, telemetry, +# audit, skill store, etc.). +# +# If you just want to get started, copy `.env.example` (minimal) instead: +# cp .env.example .env # ~5 vars, zero-config local embedder +# +# Copy this file to `.env` only when you need more control: +# cp .env.example.full .env +# +# Notes on defaults: +# - Database: DATABASE_PROVIDER=oceanbase with OCEANBASE_HOST left empty, +# which boots embedded seekdb on disk (same engine, no separate server). +# Set OCEANBASE_HOST to point at a real OceanBase cluster instead. +# - Embedder: built-in `all-MiniLM-L6-v2` (384 dims) running locally with no +# API key. Setting EMBEDDING_PROVIDER below switches to a cloud / self- +# hosted embedder instead. +# ============================================================================= + +# TIMEZONE — timezone for stored memory timestamps (created_at, updated_at, ...). +# Leave unset (default) to auto-detect from the host machine; PowerMem falls +# back to UTC if detection fails. Uncomment to pin a specific IANA zone. +# Full list: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones +# TIMEZONE=Asia/Shanghai + +# ============================================================================= +# 1. Database (Required) — where memories, vectors, and (optionally) the graph +# live. PowerMem speaks SQL underneath, so any supported backend can hold the +# full memory pipeline. +# ============================================================================= +# +# DATABASE_PROVIDER — which storage engine PowerMem talks to. +# Recommended: oceanbase (the OceanBase backend covers both deployment +# shapes: leave OCEANBASE_HOST empty for embedded +# seekdb on disk — zero ops, no separate server — +# or set OCEANBASE_HOST to point at a remote +# OceanBase cluster) +# Other options: sqlite (smallest footprint, dev-only), +# postgres / pgvector (if your stack already runs PostgreSQL) +DATABASE_PROVIDER=oceanbase + +# ----------------------------------------------------------------------------- +# OceanBase — used when DATABASE_PROVIDER=oceanbase. The same provider covers +# two deployment shapes: +# - Embedded seekdb (recommended for zero-config): leave OCEANBASE_HOST +# empty; data lives on disk under OCEANBASE_PATH, no separate database +# process is required. +# - Remote cluster: set OCEANBASE_HOST to your cluster's address. +# ----------------------------------------------------------------------------- +# OCEANBASE_HOST — hostname / IP of the cluster. +# Recommended: empty (boots embedded seekdb at OCEANBASE_PATH) +# Other options: your cluster's hostname / IP for a remote OceanBase setup +OCEANBASE_HOST= +# OCEANBASE_PATH — on-disk directory used by embedded seekdb (i.e. when +# OCEANBASE_HOST is empty). Ignored for remote clusters. +# Recommended: ./seekdb_data +# Other options: any writable absolute path, e.g. /var/lib/powermem/seekdb +OCEANBASE_PATH=./seekdb_data +# OCEANBASE_PORT — SQL listen port on the cluster (used in remote mode). +# Recommended: 2881 (the standard OceanBase port) +OCEANBASE_PORT=2881 +# OCEANBASE_USER / OCEANBASE_PASSWORD — credentials (used in remote mode). +# Use a dedicated PowerMem user with write access to the target database. +OCEANBASE_USER=root@sys +OCEANBASE_PASSWORD=your_password +# OCEANBASE_DATABASE — logical database name (also used in embedded mode). +# Recommended: powermem +OCEANBASE_DATABASE=powermem +# OCEANBASE_COLLECTION — table that stores the main memory rows. +# Recommended: memories +OCEANBASE_COLLECTION=memories + +## Keep the default settings, as modifications are generally not needed. +# OCEANBASE_INDEX_TYPE — algorithm used to index the dense vector column. +# Recommended: HNSW (best recall on small/medium datasets, low latency) +# Other options: IVF (large corpora, 10k+ vectors), +# FLAT (brute force; correct but slow above ~10k rows) +OCEANBASE_INDEX_TYPE=HNSW +# OCEANBASE_VECTOR_METRIC_TYPE — distance function used at query time. +# Recommended: cosine (matches most sentence-embedding models) +# Other options: l2 (Euclidean), inner_product (fast, normalized vectors) +OCEANBASE_VECTOR_METRIC_TYPE=cosine +# OCEANBASE_EMBEDDING_MODEL_DIMS — vector dimension; must match the embedder. +# Recommended: 384 (built-in default embedder = all-MiniLM-L6-v2) +# Other options: 768 / 1024 / 1536 / 3072 — whatever your embedder emits +OCEANBASE_EMBEDDING_MODEL_DIMS=384 +# OCEANBASE_TEXT_FIELD / VECTOR_FIELD / PRIMARY_FIELD / METADATA_FIELD / +# VIDX_NAME — column and index names PowerMem reads / writes. Change only +# if you integrate with an existing schema. +OCEANBASE_TEXT_FIELD=document +OCEANBASE_VECTOR_FIELD=embedding +OCEANBASE_PRIMARY_FIELD=id +OCEANBASE_METADATA_FIELD=metadata +OCEANBASE_VIDX_NAME=memories_vidx + +# --- Connection pool (remote mode only) -------------------------------------- +# Read only when OCEANBASE_HOST is non-empty. Embedded seekdb uses a NullPool +# and ignores these. +# OCEANBASE_POOL_RECYCLE — seconds before a pooled connection is recycled. +# Recommended: 3600 (one hour; matches most cloud LB idle timeouts) +# Other options: 1800 (aggressive), 7200+ (long-lived NAT) +OCEANBASE_POOL_RECYCLE=3600 +# OCEANBASE_POOL_PRE_PING — run a `SELECT 1` before each connection checkout +# to detect already-dead connections instead of failing the first query. +# Recommended: true (cheap insurance against pool stalls) +# Other options: false (skip the round-trip; only with bulletproof networking) +OCEANBASE_POOL_PRE_PING=true + +# --- Hybrid / sparse retrieval ----------------------------------------------- +# OCEANBASE_INCLUDE_SPARSE — provision a sparse vector column alongside the +# dense one so retrieval can fuse vector + BM25-style sparse matches. The +# generic SPARSE_VECTOR_ENABLE alias (section 14) also resolves here. +# Recommended: false (start dense-only; turn on when you need exact- +# keyword recall — IDs, error codes, etc.) +# Other options: true (requires embedded seekdb ≥1.3 or OceanBase ≥4.5; +# also configure section 14 for the sparse embedder) +OCEANBASE_INCLUDE_SPARSE=false +# OCEANBASE_ENABLE_NATIVE_HYBRID — push the hybrid (dense + full-text + +# sparse) ranking down into a single SQL call using the engine's native +# hybrid extension instead of fusing scores in Python. +# Recommended: false (safe default for older OceanBase clusters) +# Other options: true (requires embedded seekdb ≥1.3 or OceanBase ≥4.5; +# lower latency and less network chatter at scale) +OCEANBASE_ENABLE_NATIVE_HYBRID=false + +# ----------------------------------------------------------------------------- +# SQLite — lightweight, dependency-free, single-file storage for development, +# CI, and tiny deployments. No vector index; relies on a Python-side scan. +# Use only when the OceanBase embedded-seekdb mode is not an option. +# ----------------------------------------------------------------------------- +# SQLITE_PATH — file the database is written to (parent dirs auto-created). +# Recommended: ./data/powermem_dev.db +# Other options: :memory: (ephemeral, for tests only) or any writable path +SQLITE_PATH=./data/powermem_dev.db +# SQLITE_ENABLE_WAL — Write-Ahead Logging for better concurrent read perf. +# Recommended: true (safer with concurrent readers; tiny disk overhead) +# Other options: false (only set this if you need rollback-journal mode) +SQLITE_ENABLE_WAL=true +# SQLITE_TIMEOUT — seconds to wait for a database lock before raising. +# Recommended: 30 (tolerates short write contention) +# Other options: any positive number; raise for write-heavy bursts +SQLITE_TIMEOUT=30 +SQLITE_COLLECTION=memories + +# ----------------------------------------------------------------------------- +# PostgreSQL (via pgvector) — use when your environment already runs Postgres +# and you want memories in the same operational store. +# ----------------------------------------------------------------------------- +POSTGRES_HOST=127.0.0.1 +# POSTGRES_PORT — standard Postgres listen port. +# Recommended: 5432 +POSTGRES_PORT=5432 +POSTGRES_USER=postgres +POSTGRES_PASSWORD=your_password +POSTGRES_DATABASE=powermem +POSTGRES_COLLECTION=memories + +## Keep the default settings, as modifications are generally not needed. +POSTGRES_EMBEDDING_MODEL_DIMS=1536 +# POSTGRES_DISKANN / POSTGRES_HNSW — which vector index extensions are +# available and should be used. Enable whichever your Postgres instance has. +# Recommended (single index): HNSW=true, DISKANN=false (works on plain +# pgvector ≥0.5) +# Other options: DISKANN=true (requires the diskann extension, +# better for very large corpora) +POSTGRES_DISKANN=true +POSTGRES_HNSW=true +# DATABASE_SSLMODE / POOL_SIZE / MAX_OVERFLOW — uncomment to tighten the +# connection pool or force TLS. Defaults are safe for development. +# DATABASE_SSLMODE=prefer +# DATABASE_POOL_SIZE=10 +# DATABASE_MAX_OVERFLOW=20 + + +# ============================================================================= +# 2. LLM (Required) — the chat model PowerMem calls to extract facts from raw +# input, decide what to add/update/skip in memory, and synthesize search-time +# answers. The quality and cost of every memory and every retrieval flow +# through this model. +# ============================================================================= +# +# LLM_PROVIDER — which gateway to call. +# Recommended: openai +# The `openai` provider speaks the OpenAI chat-completions protocol — the +# de-facto standard. ANY OpenAI-compatible model can run through it (Qwen +# incl. the "coding plan", DeepSeek, SiliconFlow, vLLM, Ollama, ...): keep +# LLM_PROVIDER=openai and point OPENAI_LLM_BASE_URL (below) at that vendor's +# OpenAI-compatible endpoint, then set LLM_MODEL to its model name. +# Other (native) options: anthropic, qwen, siliconflow, deepseek, +# ollama / vllm — only needed for vendor-specific features the +# plain OpenAI protocol can't express (e.g. Anthropic's API). +LLM_PROVIDER=openai + +# LLM_API_KEY — credential for the chosen provider. Ignored for local +# providers (`ollama`, `vllm`). See `.env.example` for the per-provider +# console URL where you get a key. +LLM_API_KEY=your_api_key_here + +# LLM_MODEL — the specific model on the chosen endpoint. +# Recommended: gpt-4o-mini (cheap, fast, solid extraction quality) +# Other OpenAI models: gpt-4o, gpt-4.1, gpt-4-turbo +# Via OPENAI_LLM_BASE_URL: the target endpoint's own model name, e.g. +# qwen-plus / qwen-max (Qwen compatible-mode), +# deepseek-chat (DeepSeek) +# For `anthropic`: claude-sonnet-4-6, claude-haiku-4-5 +# For `ollama` / `vllm`: any model you've pulled locally +# (e.g. llama3.1:8b, qwen2.5:7b) +LLM_MODEL=gpt-4o-mini + +## Keep the default settings, as modifications are generally not needed. +# LLM_TEMPERATURE — sampling temperature; higher = more creative, lower = more +# deterministic. Memory extraction prefers deterministic output. +# Recommended: 0.7 (works well for extraction + Q&A) +# Other options: 0.0–0.3 for strict extraction; up to 1.0 for free-form Q&A +LLM_TEMPERATURE=0.7 +# LLM_MAX_TOKENS — cap on generated tokens per call. Raise it if extraction or +# answers get truncated; lower it to bound cost. +# Recommended: 1000 +# Other options: 512 (cheap, short answers); 2000+ (long answers / large +# fact extraction batches) +LLM_MAX_TOKENS=1000 +# LLM_TOP_P / LLM_TOP_K — nucleus / top-k sampling. Keep defaults unless you +# specifically need different randomness behaviour. +LLM_TOP_P=0.8 +LLM_TOP_K=50 +# LLM_ENABLE_SEARCH — Qwen-only: lets the model consult web search inside its +# own call. Costs extra tokens; only useful when memories need fresh facts. +# Recommended: false (PowerMem already has its own retrieval pipeline) +# Other options: true (only if you specifically want LLM-side web augment) +LLM_ENABLE_SEARCH=false + +# Per-provider base URLs. +# +# OPENAI_LLM_BASE_URL is the endpoint the recommended `openai` provider calls. +# Leave it on the official OpenAI URL for OpenAI itself; repoint it to run any +# OpenAI-COMPATIBLE model through the same provider (this is the recommended way +# to use Qwen, DeepSeek, SiliconFlow, vLLM, Ollama, ... — no provider switch +# needed, just the base URL + a matching LLM_MODEL): +# - OpenAI (default): https://api.openai.com/v1 +# - Qwen DashScope compatible-mode +# (incl. coding plan): https://dashscope.aliyuncs.com/compatible-mode/v1 +# (international): https://dashscope-intl.aliyuncs.com/compatible-mode/v1 +# - DeepSeek: https://api.deepseek.com/v1 +# - SiliconFlow: https://api.siliconflow.cn/v1 +# - Local vLLM / Ollama: http://localhost:8000/v1 / http://localhost:11434/v1 +OPENAI_LLM_BASE_URL=https://api.openai.com/v1 +# +# The remaining URLs are only read when you pick that *native* provider instead +# of routing through `openai` (e.g. LLM_PROVIDER=qwen). Override only when +# fronting the provider with a proxy / self-hosted gateway, or for +# `ollama` / `vllm` where you must point at your local instance. +QWEN_LLM_BASE_URL=https://dashscope.aliyuncs.com/api/v1 +SILICONFLOW_LLM_BASE_URL=https://api.siliconflow.cn/v1 +OLLAMA_LLM_BASE_URL= +VLLM_LLM_BASE_URL= +ANTHROPIC_LLM_BASE_URL=https://api.anthropic.com +DEEPSEEK_LLM_BASE_URL=https://api.deepseek.com + +# ============================================================================= +# 3. Embedding (Optional) — the model that turns text into the dense vectors +# stored in the vector store. Higher-quality embeddings = better retrieval +# recall. The block below is wired to the zero-config built-in default; flip +# EMBEDDING_PROVIDER (and the model / dims to match) to use a cloud or +# self-hosted embedder instead. +# ============================================================================= +# +# EMBEDDING_PROVIDER — which embedding service PowerMem calls. +# Recommended: default (the built-in local `all-MiniLM-L6-v2`; no API +# key, model auto-downloads to ~/.cache on first +# use — zero-config, runs entirely locally) +# Other options: qwen (cloud; higher recall, needs an API key), +# openai, siliconflow, +# huggingface (local sentence-transformers / TEI server), +# ollama / lmstudio (fully local via a separate runtime) +EMBEDDING_PROVIDER=default + +# EMBEDDING_API_KEY — credential for the chosen provider. Not used by +# `default` (and ignored by `huggingface` direct mode, `ollama`, `lmstudio`). +# Uncomment and fill in when you switch EMBEDDING_PROVIDER to a cloud +# service. +# EMBEDDING_API_KEY=your_api_key_here +# EMBEDDING_MODEL — specific embedding model on the chosen provider. +# Recommended: all-MiniLM-L6-v2 (the only model the built-in `default` +# provider supports — match it here for +# clarity / future-proofing) +# Other options for `qwen`: text-embedding-v4 (current), +# text-embedding-v3 (older, cheaper) +# For `openai`: text-embedding-3-large (best, 3072d), +# text-embedding-3-small (cheaper, 1536d), +# text-embedding-ada-002 (legacy) +# For `huggingface`: sentence-transformers/all-MiniLM-L6-v2, +# bge-m3, etc. +EMBEDDING_MODEL=all-MiniLM-L6-v2 +# EMBEDDING_DIMS — output vector dimension. MUST match the model above AND +# the OCEANBASE_EMBEDDING_MODEL_DIMS in your storage section. +# Recommended: 384 (matches the built-in `default` provider) +# Other options: 768 (bge-base), 1024 (bge-large), +# 1536 (text-embedding-v4 / text-embedding-3-small), +# 3072 (text-embedding-3-large) +EMBEDDING_DIMS=384 + +# Per-provider base URLs. Override only when you front the provider with a +# proxy / self-hosted gateway (e.g. http://localhost:8080/v1 for a local TEI +# server, or http://localhost:11434/v1 for Ollama). +QWEN_EMBEDDING_BASE_URL=https://dashscope.aliyuncs.com/api/v1 +OPENAI_EMBEDDING_BASE_URL=https://api.openai.com/v1 +# EMBEDDING_OPENAI_PASS_DIMENSIONS — set to `false` for OpenAI-compatible +# gateways that reject Matryoshka / output-dimension overrides +# (e.g. Qwen3-Embedding-8B served via an OpenAI-compatible endpoint). +# EMBEDDING_OPENAI_PASS_DIMENSIONS=false +SILICONFLOW_EMBEDDING_BASE_URL=https://api.siliconflow.cn/v1 +HUGGINFACE_EMBEDDING_BASE_URL= +LMSTUDIO_EMBEDDING_BASE_URL= +OLLAMA_EMBEDDING_BASE_URL= + +# ============================================================================= +# 4. Rerank (Optional) — a second-stage model that re-scores the top vector +# hits with a cross-encoder. Cheap recall first (vector + full-text), then +# accurate ordering with the reranker. Improves search precision a lot for +# Q&A use cases at the cost of one extra model call per query. +# ============================================================================= +# +# RERANKER_ENABLED — turn the rerank stage on/off. +# Recommended: false (start simple; enable once recall is good and you +# want sharper top-k ordering) +# Other options: true (enable for Q&A / agent loops where the top result +# matters more than coverage) +RERANKER_ENABLED=false +# RERANKER_PROVIDER — which rerank service to call when enabled. +# Recommended: qwen (qwen3-rerank; strong CN/EN performance) +# Other options: jina (jina-reranker family), zai (Zhipu AI rerank) +RERANKER_PROVIDER=qwen +# RERANKER_MODEL — specific rerank model. +# Recommended: qwen3-rerank (matches the Qwen provider above) +# Other options for `jina`: jina-reranker-v2-base-multilingual +# For `zai`: rerank-3-base, rerank-3-large +RERANKER_MODEL=qwen3-rerank +RERANKER_API_KEY=your_api_key_here +# RERANKER_API_BASE_URL — override only when fronting the provider with a +# proxy or self-hosted gateway. +# RERANKER_API_BASE_URL= + +# Each provider also accepts its native keys / URLs if RERANKER_* is unset: +# Qwen: DASHSCOPE_API_KEY + DASHSCOPE_BASE_URL +# Jina: JINA_API_KEY + JINA_API_BASE_URL +# Zhipu AI: ZAI_API_KEY + ZAI_API_BASE_URL + +# ============================================================================= +# 5. Agent (Optional) — controls how memories are scoped, shared, and +# protected across multiple agents / users in the same database. Most +# single-user, single-app deployments can leave this section at the defaults. +# ============================================================================= +# +# AGENT_ENABLED — turn agent-aware memory routing on/off. +# Recommended: true (safe even for single-agent apps; the cost is zero) +# Other options: false (only if you intentionally want a flat, unscoped +# memory pool) +AGENT_ENABLED=true +# AGENT_DEFAULT_SCOPE — default visibility scope of a new memory. +# Recommended: AGENT (memories belong to the agent that wrote them) +# Other options: USER (cross-agent, per user), +# GLOBAL (shared across the whole deployment) +AGENT_DEFAULT_SCOPE=AGENT +# AGENT_DEFAULT_PRIVACY_LEVEL — privacy tag stamped on new memories. +# Recommended: PRIVATE (locked to the owner) +# Other options: PUBLIC, RESTRICTED +AGENT_DEFAULT_PRIVACY_LEVEL=PRIVATE +# AGENT_DEFAULT_COLLABORATION_LEVEL — how other agents may interact with +# someone else's memories by default. +# Recommended: READ_ONLY (collaborate without overwriting) +# Other options: NONE (full isolation), READ_WRITE (open collaboration) +AGENT_DEFAULT_COLLABORATION_LEVEL=READ_ONLY +# AGENT_DEFAULT_ACCESS_PERMISSION — default ACL applied to new memories. +# Recommended: OWNER_ONLY +# Other options: TEAM, PUBLIC +AGENT_DEFAULT_ACCESS_PERMISSION=OWNER_ONLY + +# AGENT_MEMORY_MODE — how PowerMem partitions memory across agents/users. +# Recommended: auto (chosen from the agent_id / user_id you pass at +# call time — works for almost everyone) +# Other options: multi_agent (force agent-scoped layout), +# multi_user (force user-scoped layout), +# hybrid (both axes; for shared-team agents) +AGENT_MEMORY_MODE=auto + + +# ============================================================================= +# 6. Intelligent Memory (Optional) — Ebbinghaus-style time decay so memories +# fade unless reinforced by access, and tiered "working / short-term / +# long-term" retention thresholds drive search ranking. Disable only if you +# want a pure append-only memory log. +# ============================================================================= +# +# INTELLIGENT_MEMORY_ENABLED — turn the decay/reinforcement pipeline on. +# Recommended: true +# Other options: false (treat all memories as equally fresh forever) +INTELLIGENT_MEMORY_ENABLED=true +# INTELLIGENT_MEMORY_INITIAL_RETENTION — strength assigned to a memory the +# moment it's first stored (1.0 = fully retained). +# Recommended: 1.0 +INTELLIGENT_MEMORY_INITIAL_RETENTION=1.0 +# INTELLIGENT_MEMORY_DECAY_RATE — how quickly retention falls over time +# (higher = faster forgetting). +# Recommended: 0.1 +# Other options: 0.05 (slower fade, larger working set), +# 0.2 (more aggressive forgetting) +INTELLIGENT_MEMORY_DECAY_RATE=0.1 +# INTELLIGENT_MEMORY_REINFORCEMENT_FACTOR — how much retention recovers each +# time a memory is hit / accessed. +# Recommended: 0.3 +# Other options: 0.1–0.5 +INTELLIGENT_MEMORY_REINFORCEMENT_FACTOR=0.3 +# WORKING / SHORT_TERM / LONG_TERM thresholds — retention cutoffs that bucket +# memories into the three tiers (anything below WORKING is effectively +# forgotten unless reinforced). +# Recommended: 0.3 / 0.6 / 0.8 +INTELLIGENT_MEMORY_WORKING_THRESHOLD=0.3 +INTELLIGENT_MEMORY_SHORT_TERM_THRESHOLD=0.6 +INTELLIGENT_MEMORY_LONG_TERM_THRESHOLD=0.8 + +# Memory decay calculation — same idea, applied per-write at the storage +# layer. Keep aligned with INTELLIGENT_MEMORY_* above. +MEMORY_DECAY_ENABLED=true +# MEMORY_DECAY_ALGORITHM — decay function. +# Recommended: ebbinghaus (matches the classical forgetting curve) +# Other options: linear, exponential +MEMORY_DECAY_ALGORITHM=ebbinghaus +MEMORY_DECAY_BASE_RETENTION=1.0 +MEMORY_DECAY_FORGETTING_RATE=0.1 +MEMORY_DECAY_REINFORCEMENT_FACTOR=0.3 + +# INTELLIGENT_MEMORY_FALLBACK_TO_SIMPLE_ADD — when the LLM extraction step +# fails (rate limit / parse error), should PowerMem still persist the raw +# input as a memory? +# Recommended: false (skip noisy fallbacks; surface the error instead) +# Other options: true (best-effort capture; useful for audit / data loss +# prevention scenarios) +INTELLIGENT_MEMORY_FALLBACK_TO_SIMPLE_ADD=false + + +# ============================================================================= +# 7. Performance (Optional) — batch sizes, in-memory caches, and search +# limits. Defaults are tuned for development; raise the batch / cache numbers +# in production once you've measured. +# ============================================================================= +# +# MEMORY_BATCH_SIZE — how many memories are processed in a single internal +# batch (extraction, embedding, write). +# Recommended: 100 +# Other options: 50 (slower hardware), 200–500 (large ingest jobs) +MEMORY_BATCH_SIZE=100 +# MEMORY_CACHE_SIZE — number of recently-read memories kept in process. +# Recommended: 1000 +# Other options: 200 (RAM-constrained), 5000+ (read-heavy workloads) +MEMORY_CACHE_SIZE=1000 +# MEMORY_CACHE_TTL — seconds before a cached memory is re-fetched from store. +# Recommended: 3600 (one hour; fresh enough for most apps) +# Other options: 300 (highly-mutable data), 86400 (mostly-static memories) +MEMORY_CACHE_TTL=3600 +# MEMORY_SEARCH_LIMIT — default top-k returned by `memory.search()`. +# Recommended: 10 +# Other options: 3–5 (precision-first Q&A), 20–50 (broad context for agents) +MEMORY_SEARCH_LIMIT=10 +# MEMORY_SEARCH_THRESHOLD — minimum similarity score required to surface a +# hit (range 0.0–1.0, cosine). +# Recommended: 0.7 (drops low-quality matches without being too strict) +# Other options: 0.5 (keep more recall), 0.85 (strict precision) +MEMORY_SEARCH_THRESHOLD=0.7 + +# Vector store batching / caching — same idea, applied at the storage layer. +# VECTOR_STORE_BATCH_SIZE — rows per write batch sent to the backend. +# Recommended: 50 +# Other options: 200–1000 for bulk ingests against OceanBase / pgvector +VECTOR_STORE_BATCH_SIZE=50 +VECTOR_STORE_CACHE_SIZE=500 +# VECTOR_STORE_INDEX_REBUILD_INTERVAL — seconds between optional background +# index optimization passes. +# Recommended: 86400 (once a day; minimal disruption) +# Other options: 3600 (high-churn workloads), 0 (never) +VECTOR_STORE_INDEX_REBUILD_INTERVAL=86400 + + +# ============================================================================= +# 8. Security (Optional) — at-rest encryption of memory text and per-user +# access control. Enable when memories may contain PII / regulated data. +# ============================================================================= +# +# ENCRYPTION_ENABLED — encrypt memory payloads before storing them. +# Recommended: false (start without; enable once you have a real KMS) +# Other options: true (then ENCRYPTION_KEY MUST be set to a 32-byte key, +# preferably loaded from a secret manager) +ENCRYPTION_ENABLED=false +ENCRYPTION_KEY= +# ENCRYPTION_ALGORITHM — symmetric cipher used when encryption is on. +# Recommended: AES-256-GCM (authenticated; widely supported) +# Other options: AES-256-CBC (legacy compatibility only) +ENCRYPTION_ALGORITHM=AES-256-GCM + +# ACCESS_CONTROL_ENABLED — turn on ACL enforcement at the API boundary. +# Recommended: true (defence in depth; cheap to keep on) +# Other options: false (single-tenant dev environments) +ACCESS_CONTROL_ENABLED=true +# ACCESS_CONTROL_DEFAULT_PERMISSION — what a brand-new user can do by default. +# Recommended: READ_ONLY +# Other options: READ_WRITE, NONE +ACCESS_CONTROL_DEFAULT_PERMISSION=READ_ONLY +# ACCESS_CONTROL_ADMIN_USERS — comma-separated user ids granted full access. +# Recommended: set to your operator account(s); avoid using `root` in prod +ACCESS_CONTROL_ADMIN_USERS=admin,root + + +# ============================================================================= +# 9. Telemetry (Optional) — anonymized usage analytics streamed to the +# PowerMem hosted endpoint to help guide development. Fully opt-in. +# ============================================================================= +# +# TELEMETRY_ENABLED — turn telemetry on. +# Recommended: false (off by default; only enable if you want to send +# usage data) +# Other options: true (send anonymized events to TELEMETRY_ENDPOINT) +TELEMETRY_ENABLED=false +TELEMETRY_ENDPOINT=https://telemetry.powermem.ai +TELEMETRY_API_KEY= +# TELEMETRY_BATCH_SIZE / FLUSH_INTERVAL / RETENTION_DAYS — buffering and +# server-side retention knobs. Defaults are safe; tune only on very high +# event volumes. +TELEMETRY_BATCH_SIZE=100 +TELEMETRY_FLUSH_INTERVAL=30 +TELEMETRY_RETENTION_DAYS=30 + + +# ============================================================================= +# 10. Audit (Optional) — local append-only log of who did what (memory +# add/update/delete). Compliance-oriented; cheap to keep on. +# ============================================================================= +# +# AUDIT_ENABLED — write an audit record per state-changing call. +# Recommended: true (always-on is recommended for any shared deployment) +# Other options: false (single-developer use; nothing to audit) +AUDIT_ENABLED=true +AUDIT_LOG_FILE=./logs/audit.log +# AUDIT_LOG_LEVEL — verbosity of audit lines. +# Recommended: INFO (one line per actionable event) +# Other options: DEBUG (verbose; include request bodies), WARNING (errors only) +AUDIT_LOG_LEVEL=INFO +# AUDIT_RETENTION_DAYS — how long to keep rotated audit logs on disk. +# Recommended: 90 (typical compliance window) +# Other options: 30 (lean), 365+ (regulated industries) +AUDIT_RETENTION_DAYS=90 +AUDIT_COMPRESS_LOGS=true +AUDIT_LOG_ROTATION_SIZE=100MB + + +# ============================================================================= +# 11. Logging (Optional) — where PowerMem writes its application logs and at +# what verbosity. Independent of the audit log above. +# ============================================================================= +# +# LOGGING_LEVEL — minimum severity written to the file sink. +# Recommended: DEBUG (during development / first integration) +# Other options: INFO (production default — much less noise), +# WARNING / ERROR (only for very quiet ops environments) +LOGGING_LEVEL=DEBUG +LOGGING_FORMAT=%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] [%(user_id)s] [%(agent_id)s] - %(message)s +LOGGING_FILE=./logs/powermem.log +# LOGGING_MAX_SIZE / BACKUP_COUNT / COMPRESS_BACKUPS — log rotation. +# Recommended: 100MB / 5 files / compressed +# Other options: tune up on high-volume servers (e.g. 500MB, 10 files) +LOGGING_MAX_SIZE=100MB +LOGGING_BACKUP_COUNT=5 +LOGGING_COMPRESS_BACKUPS=true + +# Console (stdout) sink — independent of the file sink above. +LOGGING_CONSOLE_ENABLED=true +# LOGGING_CONSOLE_LEVEL — verbosity at stdout. +# Recommended: INFO (clean console; full detail still goes to the file) +# Other options: DEBUG (mirror everything), WARNING (silent unless something +# is wrong) +LOGGING_CONSOLE_LEVEL=INFO +LOGGING_CONSOLE_FORMAT=%(levelname)s - %(message)s + + +# ============================================================================= +# 12. Skill Store (Optional) — a second, structured store for *skills*: +# reusable step-by-step procedures the agent has learned ("how to refund an +# order", "how to spin up a VM"). Different from raw memory facts. Requires a +# backend with vector + fulltext support (i.e. seekdb or OceanBase). +# ============================================================================= +# +# SKILL_STORE_ENABLED — turn the skill store on. +# Recommended: false (start with plain memory; enable once you have +# repeated multi-step procedures worth distilling) +# Other options: true (then make sure DATABASE_PROVIDER is oceanbase — +# sqlite / postgres are not supported here) +SKILL_STORE_ENABLED=false +# SKILL_STORE_COLLECTION_NAME — custom table name; defaults to +# `{vector store collection}_skills` when left unset. +# SKILL_STORE_COLLECTION_NAME= +# SKILL_STORE_SIMILARITY_THRESHOLD — cosine threshold above which two skills +# are treated as duplicates (the newer one is merged into the older). +# Recommended: 0.75 +# Other options: 0.65 (aggressive merging — fewer, broader skills), +# 0.85 (conservative — keep near-duplicates separate) +SKILL_STORE_SIMILARITY_THRESHOLD=0.75 + +# ============================================================================= +# 13. Graph Store (Optional) — knowledge-graph layer for entities and the +# relations between them, in addition to the dense-vector memories above. +# Enables multi-hop retrieval ("everything connected to entity X within 2 +# hops"). Requires the OceanBase provider. +# ============================================================================= +# +# GRAPH_STORE_ENABLED — turn the graph layer on. +# Recommended: false (start with vector + full-text only; enable when +# your queries genuinely need multi-hop traversal) +# Other options: true +GRAPH_STORE_ENABLED=false + +# GRAPH_STORE_PROVIDER — which backend hosts the graph. +# Recommended: oceanbase (the same provider used by the vector store; +# leave GRAPH_STORE_HOST blank for embedded +# seekdb mode, set it for a remote cluster) +GRAPH_STORE_PROVIDER=oceanbase + +# Connection to the graph backend. Defaults below assume a local OceanBase +# embedded-seekdb instance; leave HOST blank for embedded mode, or set it +# to a remote cluster's address. +GRAPH_STORE_HOST=127.0.0.1 +GRAPH_STORE_PORT=2881 +GRAPH_STORE_USER=root@sys +GRAPH_STORE_PASSWORD=your_password +GRAPH_STORE_DB_NAME=powermem + +# GRAPH_STORE_MAX_HOPS — maximum traversal depth at query time. +# Recommended: 3 (covers most "X is related to Y via Z" queries) +# Other options: 1–2 (cheaper, narrower), 4–5 (broader; cost grows fast) +GRAPH_STORE_MAX_HOPS=3 + +# Optional: vector / index settings on the graph node embeddings (same +# semantics as the OCEANBASE_* counterparts in section 1). +# GRAPH_STORE_VECTOR_METRIC_TYPE=l2 +# GRAPH_STORE_INDEX_TYPE=HNSW + +# Optional: override the LLM prompts used by the graph extraction / +# update / delete pipelines. Leave commented to use the curated defaults. +# GRAPH_STORE_CUSTOM_PROMPT= +# GRAPH_STORE_CUSTOM_EXTRACT_RELATIONS_PROMPT= +# GRAPH_STORE_CUSTOM_UPDATE_GRAPH_PROMPT= +# GRAPH_STORE_CUSTOM_DELETE_RELATIONS_PROMPT= + +# ============================================================================= +# 14. Sparse Embedding (Optional) — adds a sparse (BM25-like) vector beside +# the dense one, so retrieval can match exact-keyword queries (names, codes, +# rare tokens) that dense embeddings tend to miss. Requires the OceanBase +# provider (embedded seekdb ≥1.3 or remote OceanBase ≥4.5); sqlite / +# pgvector do not support sparse vectors today. +# ============================================================================= +# +# SPARSE_VECTOR_ENABLE — turn sparse retrieval on. +# Recommended: false (start dense-only; enable when you see queries that +# need exact-keyword recall — IDs, error codes, etc.) +# Other options: true +SPARSE_VECTOR_ENABLE=false +# SPARSE_EMBEDDER_PROVIDER — which API produces the sparse vectors. +# Recommended: qwen (text-embedding-v4 emits a sparse output natively) +# Other options: openai +SPARSE_EMBEDDER_PROVIDER=qwen + +SPARSE_EMBEDDER_API_KEY=your_api_key_here +SPARSE_EMBEDDER_MODEL=text-embedding-v4 +SPARSE_EMBEDDING_BASE_URL=https://dashscope.aliyuncs.com/api/v1 + +# ============================================================================= +# 15. Query Rewrite (Optional) — rewrites the raw user query with an LLM +# before searching, to normalize phrasing / expand abbreviations / add +# synonyms. Improves recall on noisy or terse queries at the cost of one LLM +# call per search. +# ============================================================================= +# +# QUERY_REWRITE_ENABLED — turn rewriting on. +# Recommended: false (start without; enable once you see retrieval +# miss obvious matches due to phrasing) +# Other options: true +QUERY_REWRITE_ENABLED=false +# QUERY_REWRITE_PROMPT — custom rewrite prompt. Leave commented to use the +# built-in default tuned for memory retrieval. +# QUERY_REWRITE_PROMPT= +# QUERY_REWRITE_MODEL_OVERRIDE — model to use for rewriting; defaults to the +# main LLM_MODEL above. Keep aligned with your main LLM provider for the +# cheapest / lowest-latency calls. +# QUERY_REWRITE_MODEL_OVERRIDE= + +# ============================================================================= +# 16. HTTP API Server — only used when you run `powermem-server`. These knobs +# do not affect the SDK / CLI flows. Skip the whole section if you only use +# `from powermem import Memory`. +# ============================================================================= + +# --- Bind -------------------------------------------------------------------- +# POWERMEM_SERVER_HOST — interface to bind on. +# Recommended: 0.0.0.0 (listen on all interfaces; needed inside containers) +# Other options: 127.0.0.1 (loopback only; safest for desktop / dev) +POWERMEM_SERVER_HOST=0.0.0.0 +# POWERMEM_SERVER_PORT — TCP port. +# Recommended: 8848 +# Other options: any free port; align with your reverse proxy +POWERMEM_SERVER_PORT=8848 +# POWERMEM_SERVER_WORKERS — uvicorn worker processes (multi-process serving). +# Ignored when RELOAD=true. Pick ~ CPU cores for stateless workloads. +# Recommended: 4 +# Other options: 1 (debug), 8+ (heavy throughput) +POWERMEM_SERVER_WORKERS=4 +# POWERMEM_SERVER_RELOAD — auto-reload on source change. Single-process only. +# Recommended: false (production) +# Other options: true (local development) +POWERMEM_SERVER_RELOAD=false + +# --- Authentication ---------------------------------------------------------- +# POWERMEM_SERVER_AUTH_ENABLED — require an API key on every request. +# Default: false (zero-config local / test; works out of the box with no key) +# Set to: true (any time the server is reachable from the network — then +# also set POWERMEM_SERVER_API_KEYS to a comma-separated list) +POWERMEM_SERVER_AUTH_ENABLED=false +# POWERMEM_SERVER_API_KEYS — comma-separated list of accepted keys. +# Example: POWERMEM_SERVER_API_KEYS=key1,key2,key3 +POWERMEM_SERVER_API_KEYS= + +# --- Rate limiting ----------------------------------------------------------- +# POWERMEM_SERVER_RATE_LIMIT_ENABLED — per-IP throttling. +# Recommended: true +# Other options: false (trust your upstream proxy to do this) +POWERMEM_SERVER_RATE_LIMIT_ENABLED=true +# POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE — calls/min/IP before throttling. +# Recommended: 100 +# Other options: 30 (strict), 1000+ (server-to-server use) +POWERMEM_SERVER_RATE_LIMIT_PER_MINUTE=100 + +# --- Server-side logging ----------------------------------------------------- +POWERMEM_SERVER_LOG_FILE=server.log +# POWERMEM_SERVER_LOG_LEVEL — DEBUG / INFO / WARNING / ERROR / CRITICAL. +# Recommended: INFO (production) +# Other options: DEBUG (dev), WARNING (very quiet ops) +POWERMEM_SERVER_LOG_LEVEL=INFO +# POWERMEM_SERVER_LOG_FORMAT — `json` or `text`. +# Recommended: json (machine-parseable; required by most log shippers) +# Other options: text (human-readable in the terminal) +POWERMEM_SERVER_LOG_FORMAT=json + +# --- API metadata (shown in Swagger UI) -------------------------------------- +POWERMEM_SERVER_API_TITLE=PowerMem API +POWERMEM_SERVER_API_VERSION=v1 +POWERMEM_SERVER_API_DESCRIPTION=PowerMem HTTP API Server - Intelligent Memory System + +# --- CORS -------------------------------------------------------------------- +# POWERMEM_SERVER_CORS_ENABLED — needed for browser-based clients. +# Recommended: true (most dashboards / web UIs need this) +# Other options: false (server-to-server only) +POWERMEM_SERVER_CORS_ENABLED=true +# POWERMEM_SERVER_CORS_ORIGINS — comma-separated allowlist. +# Recommended: set to your dashboard origin(s), +# e.g. https://memory.example.com,http://localhost:3000 +# Other options: * (open; only safe inside a trusted private network) +POWERMEM_SERVER_CORS_ORIGINS=* + +# ============================================================================= +# 17. Custom Prompts (Optional) — override the LLM prompts used by the memory +# extraction pipeline. Leave commented to use the built-in defaults, which +# have been tuned against the public benchmarks (LOCOMO, AppWorld). +# ============================================================================= +# Tip: usually the right move is to tweak the *user-provided* context you pass +# to `memory.add()`, not these system prompts. Touch these only when you have +# a specific behaviour the defaults cannot express. + +# Replaces the default FACT_RETRIEVAL_PROMPT used to extract facts from raw input. +# POWERMEM_CUSTOM_FACT_EXTRACTION_PROMPT= + +# Replaces the default UPDATE_MEMORY_PROMPT used to decide add / update / skip. +# POWERMEM_CUSTOM_UPDATE_MEMORY_PROMPT= + +# Replaces the default importance scoring prompt (drives retention weighting). +# POWERMEM_CUSTOM_IMPORTANCE_EVALUATION_PROMPT= \ No newline at end of file diff --git a/.gitignore b/.gitignore index 5421756b..7dee521f 100644 --- a/.gitignore +++ b/.gitignore @@ -217,6 +217,7 @@ api_server.log # Server data directories (if any) server_data/ api_data/ +seekdb_data/ # Server backups server_backup/ diff --git a/Makefile b/Makefile index a4e94eae..2ebb0913 100644 --- a/Makefile +++ b/Makefile @@ -216,7 +216,7 @@ ENV_SERVER_WORKERS := $(shell grep -E '^POWERMEM_SERVER_WORKERS=' .env 2>/dev/nu # Use values from .env if they exist and are non-empty, otherwise use defaults SERVER_HOST := $(or $(ENV_SERVER_HOST),0.0.0.0) -SERVER_PORT := $(or $(ENV_SERVER_PORT),8000) +SERVER_PORT := $(or $(ENV_SERVER_PORT),8848) SERVER_WORKERS := $(or $(ENV_SERVER_WORKERS),4) server-start: ## Start the PowerMem API server @@ -401,21 +401,21 @@ docker-run: ## Run Docker container fi docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ -v $$(pwd)/.env:/app/.env:ro \ --env-file .env \ $(DOCKER_IMAGE):$(DOCKER_TAG) || \ (echo "Container may already exist. Use 'make docker-stop' first or 'make docker-restart'"; exit 1) @echo "✓ Container started" - @echo "Server running at http://localhost:8000" - @echo "API docs at http://localhost:8000/docs" + @echo "Server running at http://localhost:8848" + @echo "API docs at http://localhost:8848/docs" docker-up: ## Start services using docker-compose @echo "Starting services with docker-compose..." docker-compose -f $(DOCKER_COMPOSE_FILE) up -d @echo "✓ Services started" - @echo "Server running at http://localhost:8000" - @echo "API docs at http://localhost:8000/docs" + @echo "Server running at http://localhost:8848" + @echo "API docs at http://localhost:8848/docs" docker-down: ## Stop services using docker-compose @echo "Stopping services with docker-compose..." diff --git a/README.md b/README.md index 3bad783c..dc8df9e7 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ PowerMem ships first-party plugins for the most common AI clients. All of them p | Client / framework | One-line install | Mode | |--------------------|------------------|------| | OpenClaw (ClawdBot) | `openclaw plugins install memory-powermem` | CLI (default), HTTP optional | -| Claude Code | `git clone https://github.com/oceanbase/powermem && claude --plugin-dir powermem/apps/claude-code-plugin` | HTTP (default), MCP optional | +| Claude Code | `git clone https://github.com/oceanbase/powermem`, then tell Claude Code: *"Read and follow `apps/claude-code-plugin/SETUP.md`"* ([details](#claude-code)) | HTTP (default), MCP optional | | Cursor / VS Code / Codex / Windsurf / GitHub Copilot | Install the [PowerMem VS Code extension](apps/vscode-extension/) and run **PowerMem: Link to AI tools** | MCP or HTTP, per client | | Claude Desktop / Cline / any MCP client | `uvx powermem-mcp sse` | MCP (SSE / stdio / streamable-http) | | LangChain / LangGraph | `pip install powermem`, see [examples](#examples) | Python SDK | @@ -68,27 +68,26 @@ Defaults to **CLI mode** — the plugin invokes a bundled `pmem` against SQLite ### Claude Code -```bash -# From a clone of this repo -claude --plugin-dir /path/to/powermem/apps/claude-code-plugin +#### Fastest path — let Claude Code set itself up + +First download the code and enter the directory: -# Or unpack a packaged release zip and pass --plugin-dir to it -make package-claude-plugin # builds apps/claude-code-plugin/dist/.zip +```bash +git clone https://github.com/oceanbase/powermem +cd powermem ``` -HTTP mode is on by default: +Then open Claude Code in your terminal and paste this one line: -- `UserPromptSubmit` -> `POST /api/v1/memories/search` and the top results are injected as `additionalContext`. -- `SessionEnd` / `PostCompact` -> `POST /api/v1/memories` writes the transcript or compact summary. -- No MCP setup, no Python needed on the user's machine (hooks ship as native binaries under `hooks/bin/`). +```text +Read and follow apps/claude-code-plugin/SETUP.md to set up PowerMem memory for Claude Code. +``` -Switch to MCP mode for in-chat `search_memories` / `add_memory` tools: +Claude Code reads [`apps/claude-code-plugin/SETUP.md`](apps/claude-code-plugin/SETUP.md), asks you for the few required secrets, and wires everything up end-to-end. -```bash -bash scripts/apply-connection-mode.sh mcp -``` +#### Manual setup -Full reference: [`apps/claude-code-plugin/README.md`](apps/claude-code-plugin/README.md). +Prefer to wire it by hand? See the full walkthrough — environment variables, MCP mode, the `remember` / `recall` skills, Windows hooks, troubleshooting, and uninstall — in **[docs/integrations/claude_code.md](docs/integrations/claude_code.md)**. ### Cursor, VS Code, Codex, Windsurf, GitHub Copilot @@ -107,7 +106,7 @@ The same extension also provides **Query memories**, **Add selection to memory** ### Any MCP client (Claude Desktop, Cline, …) ```bash -uvx powermem-mcp sse # SSE on :8000 (recommended) +uvx powermem-mcp sse # SSE on :8848 (recommended) uvx powermem-mcp stdio # stdio uvx powermem-mcp streamable-http # streamable HTTP ``` @@ -117,7 +116,7 @@ Client config (Claude Desktop and most MCP clients): ```json { "mcpServers": { - "powermem": { "url": "http://localhost:8000/mcp" } + "powermem": { "url": "http://localhost:8848/mcp" } } } ``` @@ -148,7 +147,7 @@ End-to-end runnable demos: ## Quick start (Python SDK) -**Prerequisites:** Copy [.env.example](.env.example) to `.env` and set **LLM** and **embedding** credentials. The default database is SQLite; OceanBase can use **embedded SeekDB** without running a separate database service. After install, `pmem config init` walks you through the same setup interactively. See [Getting started](docs/guides/0001-getting_started.md). +**Prerequisites:** Copy [.env.example](.env.example) to `.env` and set your **LLM** API key — that is the only required credential. The default storage is the **OceanBase** provider with no host configured, which boots **embedded seekdb** on disk (same engine, no separate server, data under `./seekdb_data`); set `OCEANBASE_HOST` to point at a remote OceanBase cluster instead, or switch to `sqlite` / `postgres`. The default embedder is a local `all-MiniLM-L6-v2` model (384 dims) that needs no API key and auto-downloads on first use. Need to tune providers or unlock advanced features? Copy [.env.example.full](.env.example.full) instead — it documents every available knob, grouped by component. After install, `pmem config init` walks you through the same setup interactively. See [Getting started](docs/guides/0001-getting_started.md). ### Install @@ -204,7 +203,7 @@ Full reference: [CLI usage](docs/guides/0012-cli_usage.md). Uses the same `.env` as the SDK. Dashboard is served under `/dashboard/`. ```bash -powermem-server --host 0.0.0.0 --port 8000 +powermem-server --host 0.0.0.0 --port 8848 ``` Docker / Compose: see [API Server](docs/api/0005-api_server.md) and [Docker & deployment](docker/README.md). The official image is `oceanbase/powermem-server:latest`. @@ -226,7 +225,7 @@ Docker / Compose: see [API Server](docs/api/0005-api_server.md) and [Docker & de | LLM | Anthropic, OpenAI, Azure OpenAI, Gemini, Qwen (+ ASR), DeepSeek, Ollama, vLLM, SiliconFlow, Z.AI, LangChain-wrapped | | Embedding | OpenAI, Azure OpenAI, Qwen (+ VL multimodal, sparse), Gemini, Vertex AI, AWS Bedrock, Ollama, LM Studio, HuggingFace, Together, SiliconFlow, Z.AI, OceanBase MASS, LangChain-wrapped | | Rerank | Jina, Qwen, Z.AI, generic | -| Storage | OceanBase (+ graph), embedded SeekDB, PostgreSQL/pgvector, SQLite | +| Storage | OceanBase (+ graph), embedded seekdb, PostgreSQL/pgvector, SQLite | --- @@ -239,6 +238,7 @@ Docker / Compose: see [API Server](docs/api/0005-api_server.md) and [Docker & de - [CLI](docs/guides/0012-cli_usage.md) — `pmem` commands, interactive shell, backup and migration - [Multi-agent](docs/guides/0005-multi_agent.md) — scopes, isolation, and cross-agent sharing - [Integrations](docs/guides/0009-integrations.md) — LangChain and other framework wiring +- [Ecosystem integrations](docs/integrations/overview.md) — AI clients & IDEs ([Claude Code](docs/integrations/claude_code.md), …) - [Docker & deployment](docker/README.md) — images, Compose, and running the API server - [Development](docs/development/overview.md) — local setup, tests, and contributing @@ -254,7 +254,7 @@ More topics: [Sub stores](docs/guides/0006-sub_stores.md), [guides index](docs/g | Version | Date | Notes | |---------|------|--------| | 1.2.0 | 2026-04 | Experience + Skill two-layer distillation and `distill_all()` (self-evolving memory; AppWorld +15 pts); OB MASS embedding; Qwen VL multimodal embedding; OceanBase Zero Mode compatibility; LOCOMO accuracy lifted to 87.79% | -| 1.1.0 | 2026-04-02 | Embedded SeekDB for OceanBase storage without a separate database service; [IDE integrations](apps/README.md) (VS Code extension, Claude Code plugin) | +| 1.1.0 | 2026-04-02 | Embedded seekdb for OceanBase storage without a separate database service; [IDE integrations](apps/README.md) (VS Code extension, Claude Code plugin) | | 1.0.0 | 2026-03-16 | CLI (`pmem`): memory ops, config, backup/restore/migrate, interactive shell, completions; Web Dashboard | | 0.5.0 | 2026-02-06 | Unified SDK/API config (pydantic-settings); OceanBase native hybrid search; memory query + list sorting; user-profile language customization | | 0.4.0 | 2026-01-20 | Sparse vectors for hybrid retrieval; profile-based query rewriting; schema upgrade & migration tools | diff --git a/README_CN.md b/README_CN.md index c6025ec2..e225249f 100644 --- a/README_CN.md +++ b/README_CN.md @@ -68,27 +68,51 @@ openclaw plugins install memory-powermem ### Claude Code +五步即可把 Claude Code 接入 PowerMem。默认是 **HTTP 模式** —— 记忆通过 hook 静默工作,对话里不出现工具;需要显式工具时再切到 MCP 模式(最后一步)。 + +**第 1 步 —— 启动 PowerMem 后端。** 把 [.env.example](.env.example) 复制为 `.env`,填好 LLM API key,然后启动 HTTP 服务。hook 默认连 `http://localhost:8848`。 + +```bash +powermem-server --host 0.0.0.0 --port 8848 +``` + +**第 2 步 —— 获取插件并编译 hook 二进制。** hook 以原生二进制分发,因此运行 Claude Code 的机器**无需 Python**。用 Go 1.22+ 编译一次即可(或执行 `make package-claude-plugin`,它会在打包前自动编译)。 + +```bash +git clone https://github.com/oceanbase/powermem +cd powermem +make build-claude-hook # 产物在 apps/claude-code-plugin/hooks/bin/ +``` + +**第 3 步 —— 把插件加载进 Claude Code。** + ```bash -# 从本仓库直接加载(开发/调试推荐) claude --plugin-dir /path/to/powermem/apps/claude-code-plugin +``` -# 或者打包成 zip 发到目标机器,再 --plugin-dir 指向解压目录 -make package-claude-plugin # 产物:apps/claude-code-plugin/dist/.zip +**第 4 步 —— (可选)指向团队服务并设置身份。** 本地服务用默认值即可;连远程服务时按需覆盖: + +```bash +export POWERMEM_BASE_URL=https://powermem.example.com # 默认:http://localhost:8848 +export POWERMEM_API_KEY=... # 仅当服务端开启鉴权时 +export POWERMEM_USER_ID=alice # 默认:操作系统登录名 ``` -默认 **HTTP 模式**,开箱即用: +**第 5 步 —— 开始使用。** 默认 HTTP 模式下无需任何额外操作: + +- `UserPromptSubmit` → `POST /api/v1/memories/search`,命中结果通过 `additionalContext` 注入当前对话(设 `POWERMEM_PROMPT_SEARCH=0` 可按轮关闭); +- `SessionEnd` / `PostCompact` → `POST /api/v1/memories`,把整段对话或压缩摘要写回记忆。 -- `UserPromptSubmit` → `POST /api/v1/memories/search`,命中结果通过 `additionalContext` 注入当前对话; -- `SessionEnd` / `PostCompact` → `POST /api/v1/memories`,把整段对话或压缩摘要写回记忆; -- 终端机器**无需 Python**,hook 是预编译的原生二进制(macOS / Linux / Windows)。 +**验证:** 结束会话(或执行 `/compact`)后,在服务端日志中查看是否有 `POST /api/v1/memories`;在 Claude Code 里输入 `/hooks` 确认这些 hook 已注册。 -如果想让 Claude 在对话中显式调用 `search_memories` / `add_memory` 工具,切到 **MCP 模式** 即可: +**可选 —— MCP 模式** 会额外提供对话内的 `search_memories` / `add_memory` 工具,以及 `/memory-powermem:remember` 与 `recall` 技能: ```bash -bash scripts/apply-connection-mode.sh mcp +cd apps/claude-code-plugin +bash scripts/apply-connection-mode.sh mcp # 之后重启 Claude Code ``` -完整说明:[`apps/claude-code-plugin/README.md`](apps/claude-code-plugin/README.md)。 +完整说明:[Claude Code 集成指南](docs/integrations/claude_code.md) · [`apps/claude-code-plugin/README.md`](apps/claude-code-plugin/README.md)。 ### Cursor / VS Code / Codex / Windsurf / GitHub Copilot @@ -107,7 +131,7 @@ bash scripts/apply-connection-mode.sh mcp ### 任意 MCP 客户端(Claude Desktop、Cline……) ```bash -uvx powermem-mcp sse # SSE,默认 :8000(推荐) +uvx powermem-mcp sse # SSE,默认 :8848(推荐) uvx powermem-mcp stdio # stdio uvx powermem-mcp streamable-http # streamable HTTP ``` @@ -117,7 +141,7 @@ Claude Desktop / 多数 MCP 客户端的配置: ```json { "mcpServers": { - "powermem": { "url": "http://localhost:8000/mcp" } + "powermem": { "url": "http://localhost:8848/mcp" } } } ``` @@ -148,7 +172,7 @@ pip install powermem langchain langchain-openai ## 快速开始(Python SDK) -**前置条件:** 将 [.env.example](.env.example) 复制为 `.env`,配置 **LLM** 与 **向量嵌入** 凭证。默认数据库是 SQLite;OceanBase 后端可使用 **嵌入式 SeekDB**,不必额外部署数据库进程。安装后执行 `pmem config init` 可交互式生成同样的配置。详见 [入门指南](docs/guides/0001-getting_started.md)。 +**前置条件:** 将 [.env.example](.env.example) 复制为 `.env`,仅需配置 **LLM** 的 API key。默认存储是 **OceanBase** provider 且未配置 host,会自动启动 **嵌入式 seekdb**(同一引擎与 SQL,无需额外数据库进程,数据落在 `./seekdb_data`);如需连接远端 OceanBase 集群,设置 `OCEANBASE_HOST` 即可,也可改用 `sqlite` 或 `postgres`。默认 embedder 是本地的 `all-MiniLM-L6-v2`(384 维),无需 API key,首次使用时自动下载。如需调整 provider 或开启高级特性,可改用 [.env.example.full](.env.example.full),其中按组件分组记录了所有可调参数。安装后执行 `pmem config init` 可交互式生成同样的配置。详见 [入门指南](docs/guides/0001-getting_started.md)。 ### 安装 @@ -204,7 +228,7 @@ pmem shell # 交互式 REPL 与 SDK 共用 `.env`,Dashboard 路径 `/dashboard/`。 ```bash -powermem-server --host 0.0.0.0 --port 8000 +powermem-server --host 0.0.0.0 --port 8848 ``` Docker / Compose 部署见 [API Server](docs/api/0005-api_server.md) 与 [Docker 说明](docker/README.md)。官方镜像:`oceanbase/powermem-server:latest`。 @@ -226,7 +250,7 @@ Docker / Compose 部署见 [API Server](docs/api/0005-api_server.md) 与 [Docker | LLM | Anthropic、OpenAI、Azure OpenAI、Gemini、Qwen(+ ASR 语音)、DeepSeek、Ollama、vLLM、SiliconFlow、Z.AI、LangChain 包装层 | | Embedding | OpenAI、Azure OpenAI、Qwen(+ VL 多模态、稀疏向量)、Gemini、Vertex AI、AWS Bedrock、Ollama、LM Studio、HuggingFace、Together、SiliconFlow、Z.AI、OceanBase MASS、LangChain 包装层 | | Rerank | Jina、Qwen、Z.AI、通用接口 | -| Storage | OceanBase(含图存储)、嵌入式 SeekDB、PostgreSQL/pgvector、SQLite | +| Storage | OceanBase(含图存储)、嵌入式 seekdb、PostgreSQL/pgvector、SQLite | --- @@ -239,6 +263,7 @@ Docker / Compose 部署见 [API Server](docs/api/0005-api_server.md) 与 [Docker - [CLI 使用指南](docs/guides/0012-cli_usage.md) — `pmem`、交互 Shell、备份与迁移 - [多智能体](docs/guides/0005-multi_agent.md) — 作用域、隔离与跨智能体共享 - [集成说明](docs/guides/0009-integrations.md) — LangChain 等框架接入 +- [生态集成](docs/integrations/overview.md) — AI 客户端与 IDE([Claude Code](docs/integrations/claude_code.md) 等) - [Docker 与部署](docker/README.md) — 镜像、Compose、运行 API 服务 - [开发说明](docs/development/overview.md) — 本地开发、测试与贡献 @@ -254,7 +279,7 @@ Docker / Compose 部署见 [API Server](docs/api/0005-api_server.md) 与 [Docker | 版本 | 日期 | 说明 | |------|------|------| | 1.2.0 | 2026-04 | Experience + Skill 双层蒸馏与 `distill_all()`(自进化记忆,AppWorld +15 pts);OB MASS Embedding;Qwen VL 多模态 Embedding;OceanBase Zero Mode 兼容;LOCOMO 准确率提升至 87.79% | -| 1.1.0 | 2026-04-02 | OceanBase 存储支持嵌入式 SeekDB,无需单独部署数据库服务;[IDE 集成](apps/README.md)(VS Code 扩展、Claude Code 插件) | +| 1.1.0 | 2026-04-02 | OceanBase 存储支持嵌入式 seekdb,无需单独部署数据库服务;[IDE 集成](apps/README.md)(VS Code 扩展、Claude Code 插件) | | 1.0.0 | 2026-03-16 | CLI(`pmem`):记忆操作、配置、备份/恢复/迁移、交互 Shell、补全;Web Dashboard | | 0.5.0 | 2026-02-06 | SDK/API 统一配置(pydantic-settings);OceanBase 原生混合检索;记忆查询与列表排序;用户画像输出语言定制 | | 0.4.0 | 2026-01-20 | 稀疏向量混合检索;基于画像的查询改写;表结构升级与迁移工具 | diff --git a/README_JP.md b/README_JP.md index bce942eb..2d36539e 100644 --- a/README_JP.md +++ b/README_JP.md @@ -68,27 +68,51 @@ openclaw plugins install memory-powermem ### Claude Code +5 ステップで Claude Code を PowerMem に接続します。デフォルトは **HTTP モード** — メモリはフック経由で静かに動作し、会話中にツールは現れません。明示的なツールが必要な場合は最後のステップで MCP モードに切り替えます。 + +**ステップ 1 — PowerMem バックエンドを起動。** [.env.example](.env.example) を `.env` にコピーし、LLM API キーを設定してから HTTP サーバーを起動します。フックのデフォルト接続先は `http://localhost:8848` です。 + +```bash +powermem-server --host 0.0.0.0 --port 8848 +``` + +**ステップ 2 — プラグインを取得し、フックのバイナリをビルド。** フックはネイティブバイナリとして配布されるため、Claude Code を動かすマシンに **Python は不要** です。Go 1.22+ で一度ビルドします(`make package-claude-plugin` でも zip 化の前に自動ビルドされます)。 + +```bash +git clone https://github.com/oceanbase/powermem +cd powermem +make build-claude-hook # 生成物: apps/claude-code-plugin/hooks/bin/ +``` + +**ステップ 3 — プラグインを Claude Code に読み込む。** + ```bash -# 本リポジトリから直接ロード(開発/デバッグ向け) claude --plugin-dir /path/to/powermem/apps/claude-code-plugin +``` -# あるいは zip にパッケージして配布し、解凍後のディレクトリを --plugin-dir に指定 -make package-claude-plugin # 生成物: apps/claude-code-plugin/dist/.zip +**ステップ 4 — (任意) チームサーバーを指定し、ID を設定。** ローカルサーバーならデフォルトのままで構いません。リモートの場合は上書きします: + +```bash +export POWERMEM_BASE_URL=https://powermem.example.com # デフォルト: http://localhost:8848 +export POWERMEM_API_KEY=... # サーバーが認証を要求する場合のみ +export POWERMEM_USER_ID=alice # デフォルト: OS のログイン名 ``` -デフォルトは **HTTP モード**、即利用可能: +**ステップ 5 — 利用開始。** デフォルトの HTTP モードでは追加の操作は不要です: -- `UserPromptSubmit` → `POST /api/v1/memories/search`、上位結果が `additionalContext` として現在の会話に注入されます。 +- `UserPromptSubmit` → `POST /api/v1/memories/search`、上位結果が `additionalContext` として現在の会話に注入されます(`POWERMEM_PROMPT_SEARCH=0` でターンごとに無効化可能)。 - `SessionEnd` / `PostCompact` → `POST /api/v1/memories`、会話全体または圧縮サマリをメモリへ書き戻します。 -- 端末側に **Python は不要** — フックは事前ビルド済みのネイティブバイナリ(macOS / Linux / Windows)として配布されます。 -Claude が会話中に `search_memories` / `add_memory` ツールを明示的に呼び出す **MCP モード** に切り替えることもできます: +**確認:** セッションを終了(または `/compact` を実行)した後、サーバーログに `POST /api/v1/memories` が出ているか確認します。Claude Code で `/hooks` と入力すると、これらのフックが登録されているか確認できます。 + +**任意 — MCP モード** では、会話中の `search_memories` / `add_memory` ツールと `/memory-powermem:remember` ・ `recall` スキルが追加されます: ```bash -bash scripts/apply-connection-mode.sh mcp +cd apps/claude-code-plugin +bash scripts/apply-connection-mode.sh mcp # その後 Claude Code を再起動 ``` -詳細は [`apps/claude-code-plugin/README.md`](apps/claude-code-plugin/README.md) を参照してください。 +詳細は [Claude Code 連携ガイド](docs/integrations/claude_code.md) · [`apps/claude-code-plugin/README.md`](apps/claude-code-plugin/README.md) を参照してください。 ### Cursor / VS Code / Codex / Windsurf / GitHub Copilot @@ -107,7 +131,7 @@ bash scripts/apply-connection-mode.sh mcp ### 任意の MCP クライアント(Claude Desktop、Cline ……) ```bash -uvx powermem-mcp sse # SSE、デフォルト :8000(推奨) +uvx powermem-mcp sse # SSE、デフォルト :8848(推奨) uvx powermem-mcp stdio # stdio uvx powermem-mcp streamable-http # streamable HTTP ``` @@ -117,7 +141,7 @@ Claude Desktop / 多くの MCP クライアント向けの設定: ```json { "mcpServers": { - "powermem": { "url": "http://localhost:8000/mcp" } + "powermem": { "url": "http://localhost:8848/mcp" } } } ``` @@ -148,7 +172,7 @@ pip install powermem langchain langchain-openai ## クイックスタート(Python SDK) -**前提:** [.env.example](.env.example) を `.env` にコピーし、**LLM** と **埋め込み(embedding)** を設定してください。デフォルト DB は SQLite。OceanBase バックエンドでは **埋め込み SeekDB** を使えるため、別途データベースを立ち上げる必要はありません。インストール後は `pmem config init` で対話的に同じ設定を生成できます。詳しくは [はじめに](docs/guides/0001-getting_started.md) を参照してください。 +**前提:** [.env.example](.env.example) を `.env` にコピーし、**LLM** の API キーだけを設定してください。デフォルトのストレージは **OceanBase** プロバイダで host 未設定の状態 — つまり **埋め込み seekdb**(同じエンジン・SQL、別プロセスのデータベース不要、データは `./seekdb_data` に保存)を自動起動します。リモートの OceanBase クラスタに接続したい場合は `OCEANBASE_HOST` を設定するだけで、`sqlite` や `postgres` も選択可能です。デフォルトの埋め込みモデルはローカル実行の `all-MiniLM-L6-v2`(384 次元)で、API キー不要・初回利用時に自動ダウンロードされます。プロバイダ切り替えや高度な設定が必要な場合は [.env.example.full](.env.example.full) をコピーしてください。コンポーネントごとに全ての設定項目がまとめられています。インストール後は `pmem config init` で対話的に同じ設定を生成できます。詳しくは [はじめに](docs/guides/0001-getting_started.md) を参照してください。 ### インストール @@ -204,7 +228,7 @@ pmem shell # 対話 REPL SDK と同じ `.env` を使用。Dashboard は `/dashboard/` 以下に提供されます。 ```bash -powermem-server --host 0.0.0.0 --port 8000 +powermem-server --host 0.0.0.0 --port 8848 ``` Docker / Compose は [API Server](docs/api/0005-api_server.md) と [Docker README](docker/README.md) を参照。公式イメージ: `oceanbase/powermem-server:latest`。 @@ -226,7 +250,7 @@ Docker / Compose は [API Server](docs/api/0005-api_server.md) と [Docker READM | LLM | Anthropic、OpenAI、Azure OpenAI、Gemini、Qwen(+ ASR)、DeepSeek、Ollama、vLLM、SiliconFlow、Z.AI、LangChain ラッパー | | Embedding | OpenAI、Azure OpenAI、Qwen(+ VL マルチモーダル、スパース)、Gemini、Vertex AI、AWS Bedrock、Ollama、LM Studio、HuggingFace、Together、SiliconFlow、Z.AI、OceanBase MASS、LangChain ラッパー | | Rerank | Jina、Qwen、Z.AI、汎用 | -| Storage | OceanBase(+ グラフ)、埋め込み SeekDB、PostgreSQL/pgvector、SQLite | +| Storage | OceanBase(+ グラフ)、埋め込み seekdb、PostgreSQL/pgvector、SQLite | --- @@ -239,6 +263,7 @@ Docker / Compose は [API Server](docs/api/0005-api_server.md) と [Docker READM - [CLI](docs/guides/0012-cli_usage.md) — `pmem` コマンド、対話シェル、バックアップとマイグレーション - [マルチエージェント](docs/guides/0005-multi_agent.md) — スコープ、分離、エージェント間共有 - [連携](docs/guides/0009-integrations.md) — LangChain などフレームワーク連携 +- [エコシステム連携](docs/integrations/overview.md) — AI クライアントと IDE([Claude Code](docs/integrations/claude_code.md) など) - [Docker とデプロイ](docker/README.md) — イメージ、Compose、API サーバーの実行 - [開発](docs/development/overview.md) — ローカル環境、テスト、コントリビューション @@ -254,7 +279,7 @@ Docker / Compose は [API Server](docs/api/0005-api_server.md) と [Docker READM | バージョン | 日付 | 内容 | |------------|------|------| | 1.2.0 | 2026-04 | 経験 + スキル 二層蒸留と `distill_all()`(自己進化型メモリ、AppWorld +15 pts);OB MASS Embedding;Qwen VL マルチモーダル Embedding;OceanBase Zero Mode 互換;LOCOMO 精度を 87.79% に引き上げ | -| 1.1.0 | 2026-04-02 | OceanBase 向けに埋め込み SeekDB(別途 DB サービス不要);[IDE 連携](apps/README.md)(VS Code 拡張、Claude Code プラグイン) | +| 1.1.0 | 2026-04-02 | OceanBase 向けに埋め込み seekdb(別途 DB サービス不要);[IDE 連携](apps/README.md)(VS Code 拡張、Claude Code プラグイン) | | 1.0.0 | 2026-03-16 | CLI(`pmem`):メモリ操作、設定、バックアップ/復元/マイグレーション、対話シェル、補完;Web Dashboard | | 0.5.0 | 2026-02-06 | SDK/API 設定の統一(pydantic-settings);OceanBase native hybrid search;メモリクエリと一覧ソート;プロフィールの言語カスタマイズ | | 0.4.0 | 2026-01-20 | スパースベクトル混合検索;プロフィール起点のクエリ書き換え;スキーマ更新と移行ツール | diff --git a/apps/README.md b/apps/README.md index 6a63e1a7..ae7e90a7 100644 --- a/apps/README.md +++ b/apps/README.md @@ -9,7 +9,7 @@ ## Quick start -1. **Backend**: Start PowerMem (e.g. `powermem-server --port 8000` or `uvx powermem-mcp sse`). +1. **Backend**: Start PowerMem (e.g. `powermem-server --port 8848` or `uvx powermem-mcp sse`). 2. **VS Code / Cursor**: Install the extension from `vscode-extension/` (Run and Debug or package as `.vsix`), set backend URL in PowerMem settings, then use **PowerMem: Link to AI tools**. 3. **Claude Code only**: `claude --plugin-dir /path/to/powermem/apps/claude-code-plugin`. **HTTP mode is default**; run `scripts/apply-connection-mode.sh mcp` for in-chat tools (see plugin README). diff --git a/apps/claude-code-plugin/.claude-plugin/marketplace.json b/apps/claude-code-plugin/.claude-plugin/marketplace.json new file mode 100644 index 00000000..db203b4c --- /dev/null +++ b/apps/claude-code-plugin/.claude-plugin/marketplace.json @@ -0,0 +1,20 @@ +{ + "name": "powermem", + "owner": { + "name": "OceanBase / PowerMem", + "email": "open_oceanbase@oceanbase.com" + }, + "metadata": { + "description": "PowerMem marketplace: intelligent memory for Claude Code (Ebbinghaus decay, multi-agent, HTTP hooks).", + "version": "0.1.0" + }, + "plugins": [ + { + "name": "memory-powermem", + "source": "./", + "description": "PowerMem intelligent memory for Claude Code: auto-recall on prompt submit and auto-save on session end via HTTP hooks. Ebbinghaus decay and multi-agent support.", + "category": "memory", + "tags": ["memory", "powermem", "rag", "hooks", "oceanbase"] + } + ] +} diff --git a/apps/claude-code-plugin/CHANGELOG.md b/apps/claude-code-plugin/CHANGELOG.md index 78d737d5..70c29ee8 100644 --- a/apps/claude-code-plugin/CHANGELOG.md +++ b/apps/claude-code-plugin/CHANGELOG.md @@ -10,7 +10,7 @@ Initial release of the PowerMem plugin for Claude Code. **Connection modes** -- **HTTP mode (default):** Root `.mcp.json` ships with empty `mcpServers`; no PowerMem MCP tools in chat. Hooks always call the PowerMem REST API (`POWERMEM_BASE_URL`, default `http://localhost:8000`). +- **HTTP mode (default):** Root `.mcp.json` ships with empty `mcpServers`; no PowerMem MCP tools in chat. Hooks always call the PowerMem REST API (`POWERMEM_BASE_URL`, default `http://localhost:8848`). - **MCP mode (optional):** `scripts/apply-connection-mode.sh mcp` copies `config/mcp-mode.mcp.json` to `.mcp.json` so Claude can use PowerMem MCP (`search_memories`, `add_memory`, etc.) over HTTP `/mcp` or stdio. **Skills** diff --git a/apps/claude-code-plugin/README.md b/apps/claude-code-plugin/README.md index a5853192..11a15f92 100644 --- a/apps/claude-code-plugin/README.md +++ b/apps/claude-code-plugin/README.md @@ -1,233 +1,42 @@ # PowerMem Plugin for Claude Code -Claude Code plugin that connects to [PowerMem](https://github.com/oceanbase/powermem) for intelligent, persistent memory. +The full Claude Code integration guide — the auto-setup prompt, manual steps, the +two connection modes (HTTP / MCP), hooks, configuration, troubleshooting, and +uninstall — now lives in the docs and is the single source of truth: -## Features +**➡ [docs/integrations/claude_code.md](../../docs/integrations/claude_code.md)** -- **Two connection modes** (aligned with the PowerMem VS Code extension). **HTTP mode is the default** (standard): REST-only via hooks, no PowerMem MCP tools in chat. **MCP mode** is optional when you want `search_memories` / `add_memory` in the conversation. See [Configuration](#configuration). -- **HTTP mode (default)**: Root `.mcp.json` ships with empty `mcpServers`. Hooks use **`POST /api/v1/memories`** (`POWERMEM_BASE_URL`, default `http://localhost:8000`). -- **MCP mode (optional)**: Copy [`config/mcp-mode.mcp.json`](config/mcp-mode.mcp.json) to `.mcp.json` (or run `apply-connection-mode.sh mcp`). Claude gets PowerMem tools over **HTTP** `…/mcp` or **stdio**. -- **Skills**: `/memory-powermem:remember` and `/memory-powermem:recall` — effective in **MCP mode**; in default HTTP mode they cannot drive tools. -- **Seamless REST capture**: Hooks run in **both** modes. Optional **file poller** — see [watcher/README.md](watcher/README.md). -- **Auto-retrieval (no MCP required, on by default)**: The `UserPromptSubmit` hook calls **`POST /api/v1/memories/search`** with the user’s prompt and injects hits via [`additionalContext`](https://code.claude.com/docs/en/hooks#userpromptsubmit). Set **`POWERMEM_PROMPT_SEARCH=0`** (or `false` / `no` / `off`) to disable — saves a search round-trip per turn. Works in **HTTP and MCP** modes. - -## Runtime requirements (end users) - -| Piece | Needs Python? | Notes | -|--------|----------------|-------| -| Claude Code | No | | -| MCP tools | No | **Off by default** (HTTP mode). Run `apply-connection-mode.sh mcp` to enable. | -| **Hooks** (transcript / compact → HTTP API) | **No** | Native binaries under `hooks/bin/` + `run-hook.sh` (macOS/Linux) or PowerShell on Windows. **`POWERMEM_BASE_URL` defaults to `http://localhost:8000`.** | -| Optional **file poller** | No | Same binary: `sh hooks/run-hook.sh poll` — see [watcher/README.md](watcher/README.md). | - -**macOS / Linux:** default `hooks/hooks.json` runs `sh …/run-hook.sh`. POSIX `sh` is always present. - -**Windows (native, no Git Bash):** if `sh` is missing, merge the commands from [`hooks/hooks.windows.example.json`](hooks/hooks.windows.example.json) into your Claude `settings.json` so hooks call `powershell.exe -File …/run-hook.ps1`. The zip includes `hooks/bin/powermem-hook-windows-amd64.exe` (add `windows/arm64` to the build script if you need it). - -**Rebuilding binaries** (developers / CI): Go **1.22+**, then `bash scripts/build-hook-binaries.sh` or `make build-claude-hook` from the repo root. `make package-claude-plugin` builds them automatically before zipping. - -## Prerequisites - -1. **PowerMem HTTP API** reachable from the machine running Claude (e.g. `powermem-server --port 8000`). Default hooks use **`http://localhost:8000`** — override with `POWERMEM_BASE_URL` for a remote server. -2. **MCP mode only:** additionally expose MCP (same host, usually `/mcp`) or stdio `powermem-mcp`, and switch `.mcp.json` via [`config/mcp-mode.mcp.json`](config/mcp-mode.mcp.json). -3. **Claude Code** (VS Code extension or CLI) with plugin support. - -## Installation - -### Option A: Load from directory (development) +This directory still contains the plugin itself (`.claude-plugin/`, `hooks/`, +`skills/`, `config/`, `.mcp.json`). To load it: ```bash claude --plugin-dir /path/to/powermem/apps/claude-code-plugin ``` -### Option B: Install from marketplace - -If this plugin is published to a Claude Code plugin marketplace, install it from there. +## Troubleshooting — Error Handling Prompt -### Option C: Pack and copy to another machine (offline / internal) +When PowerMem encounters an issue (memory writes fail, search returns nothing, +hooks aren't firing, or the server crashes), the server logs everything to +`/tmp/powermem-server.log`. Add this to your Claude Code prompt to self-diagnose: -From the **powermem repo root**: - -```bash -make package-claude-plugin ``` - -Or run the script directly: - -```bash -bash apps/claude-code-plugin/scripts/package-plugin.sh -``` - -This writes **`apps/claude-code-plugin/dist/powermem-claude-code-plugin-.zip`**. Share that zip (USB, internal artifact server, etc.). - -**On the other computer:** - -1. Unzip → you get a folder `powermem-claude-code-plugin/` containing `.claude-plugin/`, `hooks/`, `skills/`, `.mcp.json`, etc. -2. Point Claude Code at that folder (absolute path recommended): - - ```bash - # Optional: hooks default to http://localhost:8000 if POWERMEM_BASE_URL is unset - export POWERMEM_BASE_URL=https://your-team-powermem.example.com # team server only - claude --plugin-dir /path/to/powermem-claude-code-plugin - ``` - -3. Requirements on that machine: **no Python**; use **macOS/Linux** `sh` or follow **Windows** PowerShell hooks above. **HTTP API** must be reachable for hooks (and `/mcp` too if you enable MCP mode). - -To publish a zip **with MCP enabled by default**, replace root `.mcp.json` with `config/mcp-mode.mcp.json` before `make package-claude-plugin`, or document that users run `apply-connection-mode.sh mcp`. - -## Uninstall and update - -### Uninstall - -How you remove the plugin depends on how you enabled it: - -| How you installed | What to do | -|-------------------|------------| -| **`claude --plugin-dir /path/to/...`** | Stop passing `--plugin-dir` (remove it from shell aliases, scripts, or IDE task). Optionally delete the plugin folder. Nothing is left in `~/.claude` **unless** you also changed global settings (see below). | -| **Zip / copied folder** | Delete the unzipped directory. Stop using `--plugin-dir` pointing at it. | -| **Git clone / repo path** | Stop using `--plugin-dir` for that path; remove the clone if you no longer need it. | -| **Marketplace / built-in plugin UI** | Disable or uninstall **memory-powermem** (or the listed name) in Claude Code’s plugin settings. Follow [Claude Code plugins](https://code.claude.com/docs/en/plugins) for the exact UI or CLI your version provides. | -| **You merged [`hooks/hooks.windows.example.json`](hooks/hooks.windows.example.json) into `settings.json`** | Edit `~/.claude/settings.json` or `.claude/settings.json` in the project and remove the `UserPromptSubmit` / `SessionEnd` / `PostCompact` hook entries that call `run-hook.ps1` (or restore a backup). Otherwise hooks keep running even after the plugin folder is deleted. | - -The hook binary only **writes** to your PowerMem server; it does not install a system daemon. No separate “service uninstall” is required. - -### Update - -| Install style | Update steps | -|---------------|--------------| -| **Zip** | Download the new `.zip`, replace the old folder (delete the previous `powermem-claude-code-plugin` tree, unzip the new one to the same or a new path), then start Claude with `--plugin-dir` pointing at the new folder. | -| **Repo / `git`** | `git pull` (or fetch the release you want), run `make package-claude-plugin` or `bash scripts/package-plugin.sh` if you need a fresh zip, then restart Claude Code. | -| **Marketplace** | Use “update” / reinstall from the marketplace when your team publishes a new version. | - -After updating, restart the Claude Code session (or the whole app) so MCP config, skills, and hooks reload. - -## Configuration - -### Two PowerMem modes (HTTP default, MCP optional) - -Same **MCP / HTTP** split as elsewhere in PowerMem. **Standard shipping = HTTP mode**: root `.mcp.json` has **`mcpServers: {}`**. **Hooks always use REST** in both modes. - -| Mode | Plugin root `.mcp.json` | Claude in-chat | Silent capture (hooks → REST) | -|------|-------------------------|----------------|--------------------------------| -| **HTTP mode (default)** | Empty `mcpServers` — same as [`config/http-mode.mcp.json`](config/http-mode.mcp.json) | No PowerMem MCP tools | Yes (`POWERMEM_BASE_URL`, default `http://localhost:8000`) | -| **MCP mode** | Includes `powermem` — [`config/mcp-mode.mcp.json`](config/mcp-mode.mcp.json) | Yes — `search_memories`, `add_memory`, … | Yes | - -**Switch mode** (from the plugin directory): - -```bash -bash scripts/apply-connection-mode.sh http # restore standard (default) HTTP-only mode -bash scripts/apply-connection-mode.sh mcp # enable in-chat PowerMem tools +If powermem-server has an error (HTTP 503, 500, or the hook calls fail): +1. Read /tmp/powermem-server.log and look for ERROR-level lines or stack traces +2. Common issues and fixes: + - "Vector dimension mismatch" → old seekdb data uses a different embedder + dimension. Delete the seekdb_data/ directory and restart: + rm -rf ./seekdb_data && powermem-server --host 0.0.0.0 --port 8848 + - "libomp.dylib already initialized" (OMP Error #15) → multiple OpenMP + runtimes loaded. Restart with: + KMP_DUPLICATE_LIB_OK=TRUE powermem-server --host 0.0.0.0 --port 8848 + - "Connection timed out" downloading HF models → Hugging Face unreachable. + Use the China mirror: + HF_ENDPOINT=https://hf-mirror.com powermem-server --host 0.0.0.0 --port 8848 + - "open seekdb failed" / "opened by other process" → a stale lock file. + Kill all powermem-server processes, then clean and restart: + pkill -9 -f powermem-server && rm -rf ./seekdb_data && powermem-server --host 0.0.0.0 --port 8848 + - "ModuleNotFoundError" → missing pip dependency. Install it: + pip install +3. After applying a fix, re-run the failing operation and confirm the log shows + the request succeeded (200 status, no ERROR lines). ``` - -Restart Claude Code after changing `.mcp.json`. See [`config/README.md`](config/README.md). - -**Naming note:** In **MCP mode**, `transport: "http"` means “connect to the **MCP** endpoint over HTTP” (`https://host/mcp`), not “replace MCP with REST.” **HTTP mode** means “no MCP entry for PowerMem”; REST is still used by hooks. - -### MCP mode: team or local URL - -After `apply-connection-mode.sh mcp`, edit `.mcp.json` or `config/mcp-mode.mcp.json` before copying. Same host as your REST API, MCP path is usually `/mcp`: - -```json -{ - "mcpServers": { - "powermem": { - "transport": "http", - "url": "https://powermem.example.com/mcp" - } - } -} -``` - -**stdio MCP** (local `powermem-mcp` process) — in **MCP mode**, replace the `powermem` block with: - -```json -{ - "mcpServers": { - "powermem": { - "transport": "stdio", - "command": "uvx", - "args": ["powermem-mcp", "stdio"] - } - } -} -``` - -Ensure PowerMem is installed (`pip install powermem`) and a `.env` is available when using stdio. - -### HTTP mode: REST only (standard) - -This is the **default** root `.mcp.json`. Claude has **no** PowerMem MCP tools; skills that reference those tools have nothing to call. **Hooks** still send transcripts / compact summaries to `POST /api/v1/memories`. To reset after trying MCP: `bash scripts/apply-connection-mode.sh http`. - -### Seamless recording (hooks + HTTP API) - -The plugin ships [`hooks/hooks.json`](hooks/hooks.json), [`hooks/run-hook.sh`](hooks/run-hook.sh), and **native** `hooks/bin/powermem-hook-*` (built from [`cmd/powermem-hook`](cmd/powermem-hook/)). When the plugin is enabled, Claude Code merges these hooks: - -| Hook | What happens | -|------|----------------| -| `UserPromptSubmit` | By default, **`POST …/api/v1/memories/search`** with the submitted `prompt`; top results are injected as **additional context** for that turn ([Claude Code hooks](https://code.claude.com/docs/en/hooks#userpromptsubmit)). Set **`POWERMEM_PROMPT_SEARCH=0`** (or `false` / `no` / `off`) to skip search (hook still registered; overhead is small when disabled). | -| `SessionEnd` | Full **transcript** from `transcript_path` (parsed JSONL: user/assistant/summary lines) → **`POST …/api/v1/memories`**. | -| `PostCompact` | The **`compact_summary`** field after `/compact` or auto-compact → **`POST …/api/v1/memories`**. | - -**Write** hooks use `POST {POWERMEM_BASE_URL}/api/v1/memories`. **Prompt search** uses `POST {POWERMEM_BASE_URL}/api/v1/memories/search`. Neither path requires MCP. - -Optional environment variables (where you launch Claude Code): - -| Variable | Required | Description | -|----------|----------|-------------| -| `POWERMEM_BASE_URL` | No | Defaults to **`http://localhost:8000`** (same host as default `.mcp.json`, without `/mcp`). Set for a team gateway, e.g. `https://powermem.example.com`. | -| `POWERMEM_API_KEY` | If server uses auth | Sent as `X-API-Key` | -| `POWERMEM_USER_ID` | No | Defaults to OS login name | -| `POWERMEM_AGENT_ID` | No | Optional `agent_id` on memories | -| `POWERMEM_HOOK_MAX_CHARS` | No | Transcript cap (default `120000`) | -| `POWERMEM_INFER_TRANSCRIPT` | No | Set `1` to enable server-side infer on large transcripts (default off) | -| `POWERMEM_INFER_COMPACT` | No | Set `0` to disable infer on compact summaries (default on) | -| `POWERMEM_PROMPT_SEARCH` | No | **Default: on** — injects semantic search results on every user prompt via `UserPromptSubmit`. Set **`0`** / **`false`** / **`no`** / **`off`** to disable. | -| `POWERMEM_PROMPT_SEARCH_LIMIT` | No | Max memories returned per prompt (default **8**, cap **30**). | -| `POWERMEM_PROMPT_SEARCH_MAX_CHARS` | No | Cap on injected context string (default **24000**). | - -**SessionEnd timeout:** Claude Code defaults to a short timeout for `SessionEnd` hooks. The hook **returns immediately** and uploads in a **detached worker process**, so large transcripts still upload without blocking exit. If you ever switch to a synchronous upload inside the hook, raise `CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS` (see [Claude Code hooks – SessionEnd](https://code.claude.com/docs/en/hooks#sessionend)). - -### Troubleshooting: “no requests” while vibe-coding - -What you see is often **expected**: - -1. **Default HTTP mode** — There are **no** PowerMem MCP tools during chat, so Claude does **not** call `/mcp` on each message. **`POST /api/v1/memories`** (writes) still come from **`SessionEnd`** / **`PostCompact`**, not every reply. By default, **`POST /api/v1/memories/search`** runs **on each user message** via `UserPromptSubmit`; set **`POWERMEM_PROMPT_SEARCH=0`** to turn that off. -2. **Not every hook is per-turn** — `SessionEnd` runs when the **session ends** (quit, `/clear`, `/resume` switch, etc.). `PostCompact` runs after **manual or auto compact**, not after every reply. -3. **Those GETs** (`/system/status`, `/memories/stats`, …) usually come from another client (e.g. **PowerMem VS Code extension** dashboard), not from Claude Code hooks. - -**How to verify hooks:** - -- **End the Claude Code session** (exit the CLI session that used `--plugin-dir`), then check server logs for **`POST /api/v1/memories`** (the worker runs shortly after exit). -- Or trigger **`/compact`** (or wait for auto-compact) and look for a compact-summary write. -- In Claude Code, type **`/hooks`** and confirm `UserPromptSubmit` (if present) / `SessionEnd` / `PostCompact` list this plugin’s command (see [hooks menu](https://code.claude.com/docs/en/hooks#the-hooks-menu)). - -**If you want traffic during the conversation:** - -- **`POWERMEM_PROMPT_SEARCH` is on by default**, so each user message triggers **`POST /api/v1/memories/search`** and retrieved memories are **injected automatically** (no MCP tools needed). Set **`POWERMEM_PROMPT_SEARCH=0`** to turn that off. -- Or switch to **MCP mode** (`bash scripts/apply-connection-mode.sh mcp`) so Claude can call memory tools when it chooses — traffic goes to **`/mcp`**, not necessarily the same paths as the dashboard GETs. -- Or rely on **VS Code extension** save capture / `sh hooks/run-hook.sh poll` for file-based writes. - -### Optional: workspace file watcher (CLI / no VS Code) - -If engineers use **Claude Code without** the [PowerMem VS Code extension](../vscode-extension/) (which already **auto-captures on save** against `powermem.backendUrl`), run the native poller: - -```bash -export POWERMEM_BASE_URL=https://powermem.example.com -export POWERMEM_API_KEY=... # if required -export POWERMEM_WATCH_ROOT=/path/to/repo -sh hooks/run-hook.sh poll -``` - -See [watcher/README.md](watcher/README.md) for environment variables. - -## Usage - -- **Default (HTTP mode):** Hooks capture to REST automatically; no PowerMem tools in chat. **Per-prompt semantic retrieval is on by default** (see [Seamless recording](#seamless-recording-hooks--http-api)); set **`POWERMEM_PROMPT_SEARCH=0`** to disable. -- **MCP mode:** Run `apply-connection-mode.sh mcp`, then PowerMem tools appear; use **/memory-powermem:remember** / **recall** with real tool backing. Per-prompt injection stays **on by default**; set **`POWERMEM_PROMPT_SEARCH=0`** if you only want explicit MCP tool use. -- In **both** modes, transcript/compact hooks write to REST (`POWERMEM_BASE_URL`, default `http://localhost:8000`) without the model calling tools. - -## Links - -- [PowerMem](https://github.com/oceanbase/powermem) -- [PowerMem MCP docs](https://github.com/oceanbase/powermem/blob/master/docs/api/0004-mcp.md) -- [Claude Code hooks reference](https://code.claude.com/docs/en/hooks) diff --git a/apps/claude-code-plugin/SETUP.md b/apps/claude-code-plugin/SETUP.md new file mode 100644 index 00000000..ccbcfe6c --- /dev/null +++ b/apps/claude-code-plugin/SETUP.md @@ -0,0 +1,363 @@ +# PowerMem — automated Claude Code setup + +This file is a **prompt for Claude Code**. Open Claude Code in your terminal and say: + +> Read and follow `apps/claude-code-plugin/SETUP.md` to set up PowerMem memory for Claude Code. + +Claude Code will then run the steps below: detect whether you are in the PowerMem +source tree or not, ask you for the few required secrets, and wire PowerMem up as a +**globally enabled** plugin so every `claude` session (interactive AND non-interactive +`claude -p`) uses it automatically — no per-session `--plugin-dir` flag. + +--- + +Set up PowerMem memory for Claude Code on this machine **globally**. Do the whole +integration autonomously and ask me for any secret you need — never invent credentials. + +**⚠️ `.env` changes always require user approval.** Before modifying `.env` for any +reason (LLM config, embedder settings, storage switches, ...), show the user the +current values, propose the exact change, and WAIT for confirmation. Never patch +`.env` silently. + +This procedure is **idempotent**: it is safe to re-run. Each step must detect existing +state and either skip, reuse, or refresh it instead of failing or duplicating work. + +1. DETECT CONTEXT. The current directory is the PowerMem source tree if a + pyproject.toml here has name = "powermem" (or src/powermem/ and + apps/claude-code-plugin/ both exist). Tell me which path you will take: + - SOURCE -> build & deploy from this checkout and install the Claude Code + plugin GLOBALLY in HTTP mode (hooks -> REST; needs Go 1.22+). + - PIP -> install from PyPI and connect via the powermem-mcp server + (the plugin is NOT on PyPI, so pip users integrate over MCP). + +**⚠️ RULE: Every time you need to modify `.env` — for any reason, even a +single variable — you MUST stop and ask the user what value to use. Show +the current content of the relevant lines, propose the change, and WAIT +for the user's confirmation before writing. Never silently patch `.env`.** + +2. COLLECT CONFIG (idempotent). If a .env already exists in the working directory + with LLM_PROVIDER / LLM_API_KEY / LLM_MODEL set, REUSE it and only ask me about + anything missing. Otherwise ask for: LLM provider (anthropic / openai / qwen / + ...), LLM API key, and LLM model. Use zero-config defaults for the rest + (storage = embedded seekdb, embedder = local all-MiniLM-L6-v2) unless I say + otherwise. + **Before writing or patching .env, you MUST:** + a. Show me the current `.env` lines that will change (or note it is new). + b. Propose the exact new/changed values. + c. WAIT for my explicit "yes" before applying the write. + Copy .env.example if present, then fill + LLM_PROVIDER / LLM_API_KEY / LLM_MODEL. For a custom endpoint, the var is the + provider-prefixed *_LLM_BASE_URL (e.g. OPENAI_LLM_BASE_URL, QWEN_LLM_BASE_URL) — + verify the exact spelling against .env.example.full; a typo is silently ignored. + Never echo my key back in full. + +3a. SOURCE path (global install): + - pip install -e . (no-op if already installed editable from this checkout) + - Build the hook binaries FIRST — they get copied into Claude's plugin cache at + install time, so they must exist on disk before step "install": + if Go 1.22+ is present: make build-claude-hook + else tell me, and offer to install Go or fall back to the PIP path below. + - Ensure the plugin's root .mcp.json stays empty ({}) — default HTTP mode. + - STAGE the plugin into a stable, Claude-owned location so the marketplace does + NOT depend on this checkout — you can move or delete the repo afterwards and + memory keeps working. Copy the whole plugin dir (built binaries included) into + ~/.claude/marketplaces/powermem: + DEST="$HOME/.claude/marketplaces/powermem" + mkdir -p "$DEST" + rsync -a --delete "/apps/claude-code-plugin/" "$DEST/" + # no rsync? rm -rf "$DEST" && cp -a "/apps/claude-code-plugin/." "$DEST/" + The binaries from `make build-claude-hook` must already be on disk before this + copy. Re-copy on every re-run so the staged dir tracks your latest build. + - Register the marketplace from the STAGED dir (it ships + .claude-plugin/marketplace.json) — never from the repo: + claude plugin marketplace add "$DEST" + If it reports "already on disk", refresh it instead: + claude plugin marketplace update powermem + - Install + enable the plugin globally (user scope). Install auto-enables it: + claude plugin install memory-powermem@powermem --scope user + IMPORTANT idempotency rule: a plain re-install is a no-op and does NOT refresh + the cached copy. If the plugin is already installed AND you just rebuilt the + binaries or changed the plugin, force a refresh: + claude plugin uninstall memory-powermem@powermem + claude plugin install memory-powermem@powermem --scope user + (Enablement is preserved across uninstall+reinstall.) + - Start the API server only if it is not already healthy (idempotent): + curl -s http://localhost:8848/api/v1/system/health # if not healthy: + powermem-server --host 0.0.0.0 --port 8848 & # run in background + - Confirm the plugin is enabled: claude plugin list (look for + memory-powermem@powermem). Do NOT print a --plugin-dir command — it is global + now; every `claude` and `claude -p` loads it automatically. + +3b. PIP path: + - Ensure uvx is available (offer to install uv if missing), then: + pip install powermem + - Register the MCP server globally so it persists across sessions (stdio = no + port), run from the directory holding the .env. Idempotent: if `claude mcp get + powermem` already exists, remove it first, then add: + claude mcp remove powermem 2>/dev/null; claude mcp add powermem -- uvx powermem-mcp stdio + +4. VERIFY with a real round-trip — do not claim success without data. Run the exact + commands below and substitute nothing except the noted placeholder. Do NOT mark + this step done until you have seen a non-empty search result actually come back. + + SOURCE/HTTP path — run a/b/c/d/e in order: + + a. Confirm the server answers (output must contain "status":"healthy"): + curl -s -m 5 http://localhost:8848/api/v1/system/health + + b. WRITE a probe memory. CRITICAL SCHEMA: the request body is a single "content" + STRING field — NOT a mem0-style "messages" array. Sending {"messages":[...]} + returns HTTP 422 `{"detail":[{"type":"missing","loc":["body","content"]...}]}`. + Use a unique user_id so the probe is isolated from real data: + curl -s -m 60 -X POST http://localhost:8848/api/v1/memories \ + -H 'Content-Type: application/json' \ + -d '{"content":"PowerMem setup probe: my favorite test fruit is dragonfruit-zx9.","user_id":"powermem_setup_probe"}' + Expected: JSON with "success": true and a data[0].memory_id (a long numeric + string). The call can take 10-30s because the LLM extracts facts — KEEP the + -m 60 timeout and WAIT; do not background it or abort early. Save the returned + data[0].memory_id (a.k.a. data[0].id) — you need it for cleanup in (e). + + c. SEARCH it back. CRITICAL SCHEMA: the body field is "query" (not "question" or + "text"), with the SAME user_id you wrote with: + curl -s -m 30 -X POST http://localhost:8848/api/v1/memories/search \ + -H 'Content-Type: application/json' \ + -d '{"query":"what is my favorite test fruit","user_id":"powermem_setup_probe","limit":5}' + Expected: data.total >= 1 and data.results[0].content mentions dragonfruit-zx9. + If data.total is 0 the round-trip FAILED — do NOT report success. Re-check the + server log and the embedder, retry the write in (b) once, then escalate to me. + + d. SHOW me both the write JSON and the search JSON (this is the proof of success). + + e. CLEAN UP the probe — delete by the id from (b), then confirm it is gone: + curl -s -m 10 -X DELETE http://localhost:8848/api/v1/memories/ + Re-run the search from (c): data.total must now be 0. + + f. BONUS (proves global + headless wiring; do it if you can). Run a headless + prompt from an UNRELATED dir with NO --plugin-dir, then check the logs for + the two hook-driven calls it triggers: + ( cd /tmp && claude -p "Reply with exactly: probe ok" ) + Then in `server.log` (powermem-server) and `seekdb_data/log/seekdb.log` + (seekdb), AFTER that run, you MUST see both: + POST /api/v1/memories/search <- UserPromptSubmit hook (auto-recall) + POST /api/v1/memories <- SessionEnd hook (auto-save) + Seeing both proves PowerMem loads automatically in every `claude`/`claude -p`. + + PIP/MCP path: confirm `claude mcp list` shows powermem as "connected" (not + "failed"). If it shows failed, run `claude mcp get powermem` and verify the + configured command resolves on PATH. + +5. SUMMARIZE: path taken, where .env lives, where the staged marketplace lives + (~/.claude/marketplaces/powermem — independent of this repo), the server URL, + how memory is wired + (HTTP hooks vs MCP tools — recall is auto-injected on UserPromptSubmit, not a + tool the model calls; writes happen on SessionEnd/PostCompact), confirmation that + it is enabled globally, and the fact that I just run `claude` (or `claude -p`) + with nothing extra. Note: the background server does not survive a reboot — offer + to set up a systemd user service for autostart. + +## Re-running / refreshing later + +This file is safe to re-run end to end. The only manual-feeling case is refreshing +the cached plugin after you change the plugin or rebuild the Go hooks at the SAME +version: rebuild (`make build-claude-hook`), re-copy the result into the staged +marketplace (`rsync -a --delete /apps/claude-code-plugin/ ~/.claude/marketplaces/powermem/`), +then force-refresh the cache with `claude plugin uninstall memory-powermem@powermem` +followed by `claude plugin install memory-powermem@powermem --scope user` (or bump the +version in .claude-plugin/plugin.json so `claude plugin update memory-powermem` picks it up). + +To turn it off without uninstalling: `claude plugin disable memory-powermem@powermem` +(re-enable with `claude plugin enable ...`). To disable only prompt-time search +injection, set POWERMEM_PROMPT_SEARCH=0. The hook talks to POWERMEM_BASE_URL +(default http://localhost:8848). + +For the full manual reference, see ../../docs/integrations/claude_code.md + + +## 🚨 COMPREHENSIVE ERROR HANDLING GUIDE + +Every real-world setup encounters issues. This guide documents specific error scenarios +and their resolutions discovered during actual setup attempts: + +### Log File Locations +- **powermem-server**: `server.log` (RotatingFileHandler, 10MB max, 5 backups) +- **seekdb**: `seekdb_data/log/seekdb.log` (native C++ engine log) + +### Error Resolution Checklist + +#### [E001] PEP 668 System Protection +**Problem**: `pip install` fails with "externally-managed-environment" +**Fix**: Use virtual environment +```bash +python3 -m venv venv +source venv/bin/activate +pip install -e . +``` + +#### [E002] Missing Server Dependencies +**Problem**: Server startup fails with missing packages +**Fix**: Install missing dependencies +```bash +pip install 'powermem[server]' +pip install 'pyobvector[pyseekdb]' +pip install loguru +``` + +#### [E003] SeekDB File Locking +**Problem**: "open seekdb failed OB_ERROR(4000)" or "db opened by other process" +**Fix**: Clean corrupted data +```bash +pkill -f powermem-server +rm -rf seekdb_data +powermem-server --host 0.0.0.0 --port 8848 & +``` + +#### [E004] Missing Go for Hooks +**Problem**: `make build-claude-hook` fails +**Fix**: Install Go 1.22+ +```bash +brew install go +make build-claude-hook +``` + +#### [E005] Storage Backend Initialization +**Problem**: 503 errors on API calls despite server health +**Fix**: Use SQLite alternative +```bash +STORAGE_TYPE=sqlite SQLITE_DB_PATH=sqlite_data/powermem.db powermem-server --host 0.0.0.0 --port 8848 & +``` + +#### [E006] Model Download Timeout +**Problem**: Server hangs for 30-60s on startup, "timed out thrown while requesting HEAD" +**Fix**: The embedder now auto-detects cache and falls back with a 30s timeout. +If the model is not cached, download it manually: +```bash +python -c "from modelscope import snapshot_download; snapshot_download('AI-ModelScope/all-MiniLM-L6-v2')" +``` + +## PRE-CHECK & PREREQUISITES + +1. **Verify Go version**: `go version` (must be 1.22+) +2. **Check dependencies**: `python3 -m pip --version` +3. **Install uvx**: `curl -LsSf https://astral.sh/uv/install.sh | sh` + +## STEP BY STEP PROVEN PATH + +Given the encountered errors, here are the tested workarounds for each approach: + +### Method A: SOURCE Path (Current Directory Build) +```bash +# Create virtual environment to avoid PEP 668 issues +python3 -m venv venv +source venv/bin/activate + +# Install everything with extras +pip install -e . +pip install 'powermem[server]' +pip install 'pyobvector[pyseekdb]' +pip install loguru + +# Build and stage Claude hooks +make build-claude-hook + +# Register marketplace +DEST="$HOME/.claude/marketplaces/powermem" +mkdir -p "$DEST" +rsync -a --delete "$(pwd)/apps/claude-code-plugin/" "$DEST/" +claude plugin marketplace add "$DEST" +claude plugin install memory-powermem@powermem --scope user + +# Start server (logs go to server.log automatically) +powermem-server --host 0.0.0.0 --port 8848 & +``` + +### Method B: PIP Path (Recommended for Stability) +```bash +# Clean virtual environment approach +python3 -m venv venv +source venv/bin/activate +pip install powermem +pip install 'powermem[server]' +claude mcp remove powermem 2>/dev/null +claude mcp add powermem -- uvx powermem-mcp stdio +``` + +### Method C: Troubleshooting Installation +```bash +# Common troubleshooting commands +lsof -i :8848 # Check if port is in use +pkill -f powermem-server # Kill any running server +rm -rf seekdb_data # Reset SeekDB if corrupted + +# Check logs +tail -f server.log # PowerMem server errors +tail -f seekdb_data/log/seekdb.log # SeekDB engine errors +``` + +## FINAL VALIDATION STEPS + +After setup, verify the complete round-trip: + +```bash +# 1. Health check +curl -s http://localhost:8848/api/v1/system/health + +# 2. Write test memory +curl -s -X POST http://localhost:8848/api/v1/memories \ + -H 'Content-Type: application/json' \ + -d '{"content":"PowerMem setup verification complete","user_id":"setup_test"}' + +# 3. Search test +curl -s -X POST http://localhost:8848/api/v1/memories/search \ + -H 'Content-Type: application/json' \ + -d '{"query":"setup verification","user_id":"setup_test","limit":1}' +``` + +## SYSTEMD AUTOSTART (Optional) +```bash +mkdir -p ~/.config/systemd/user +cat > ~/.config/systemd/user/powermem.service << EOF +[Unit] +Description=PowerMem Memory Server +After=network.target + +[Service] +Type=simple +WorkingDirectory=$(pwd) +ExecStart=/bin/bash -c 'source venv/bin/activate && powermem-server --host 0.0.0.0 --port 8848' +Restart=always +RestartSec=5 + +[Install] +WantedBy=default.target +EOF + +systemctl --user daemon-reload +systemctl --user enable powermem.service +systemctl --user start powermem.service +``` + +## SUMMARY + +**Path taken**: **[Based on observed errors, recommend PIP approach for stability]** +- **.env location**: $(pwd)/.env +- **Virtual environment**: $(pwd)/venv +- **Plugin marketplace**: ~/.claude/marketplaces/powermem +- **Server URL**: http://localhost:8848 +- **Memory system**: SQLite storage with HTTP hooks +- **Global enablement**: Complete via `claude plugin install` +- **Usage**: Run `claude` command with no extra flags needed + +**Quick commands for daily use**: +```bash +# Start server +source venv/bin/activate +powermem-server --host 0.0.0.0 --port 8848 + +# Check status +systemctl --user status powermem.service + +# Quick restart +ps aux | grep powrmem +``` + +Your Claude Code is now configured with automatic memory recall and persistence worldwide. \ No newline at end of file diff --git a/apps/claude-code-plugin/UNINSTALL.md b/apps/claude-code-plugin/UNINSTALL.md new file mode 100644 index 00000000..62b491d5 --- /dev/null +++ b/apps/claude-code-plugin/UNINSTALL.md @@ -0,0 +1,91 @@ +# PowerMem — automated Claude Code teardown + +This file is a **prompt for Claude Code**. Open Claude Code in your terminal and say: + +> Read and follow `apps/claude-code-plugin/UNINSTALL.md` to remove PowerMem from Claude Code. + +It reverses everything `SETUP.md` did: it unregisters the plugin/MCP server, removes the +staged marketplace copy (~/.claude/marketplaces/powermem), stops the PowerMem API server, +uninstalls the powermem package, and (with my confirmation) cleans up build artifacts and +stored data. + +--- + +Remove the PowerMem Claude Code integration from this machine. Do it autonomously. + +This procedure is **idempotent and re-runnable**: running it any number of times is safe, +including on a machine where PowerMem is only partially installed or already fully gone. +Treat every "not found / not installed / already removed / port already free" condition as +SUCCESS — append `|| true` (or equivalent) to each command so no such case aborts the run. +A clean machine must produce all-"already absent" results and still finish at exit 0. +Report what was actually removed vs. already absent. + +Do NOT delete my LLM API key, my .env, or my stored memories without explicit +confirmation — those steps are gated below. + +1. DETECT CONTEXT. The current directory is the PowerMem source tree if a + pyproject.toml here has name = "powermem" (or src/powermem/ and + apps/claude-code-plugin/ both exist). Tell me which path applies: + - SOURCE -> global plugin install (HTTP hooks) was used. + - PIP -> the powermem-mcp server (MCP) was used. + If unsure, check both: `claude plugin list` (look for memory-powermem@powermem) + and `claude mcp list` (look for powermem). If NEITHER is present, PowerMem is already + unregistered — say so, then still run the remaining steps (they will all be harmless + no-ops) and go straight to SUMMARIZE. + +2. STOP THE API SERVER (idempotent). Prefer the Makefile target in the source tree + (it already exits 0 when nothing is running): + make server-stop 2>/dev/null || true + If that target is unavailable or the server was started another way, fall back to a + port-based stop (default port 8848). The trailing `; true` keeps it green when the + port is already free: + PID=$(lsof -t -i:8848 2>/dev/null); [ -n "$PID" ] && { kill "$PID" 2>/dev/null; sleep 2; kill -9 "$PID" 2>/dev/null; }; true + Then confirm nothing answers (either branch is fine, never errors): + curl -s -m 3 http://localhost:8848/api/v1/system/health >/dev/null 2>&1 && echo "still up" || echo "server down" + Also remove a stale PID file if present: rm -f .server.pid 2>/dev/null || true + +3a. SOURCE path — remove the global plugin + marketplace (idempotent): + - Disable then uninstall the plugin (skip silently if not installed): + claude plugin disable memory-powermem@powermem 2>/dev/null || true + claude plugin uninstall memory-powermem@powermem 2>/dev/null || true + - Remove the marketplace registration (skip silently if not present): + claude plugin marketplace remove powermem 2>/dev/null || true + - Remove the staged marketplace copy created by SETUP's STAGE step (rm -rf never + errors when the dir is already gone). This is plugin build output, not user data: + rm -rf "$HOME/.claude/marketplaces/powermem" 2>/dev/null || true + - Verify it is gone: `claude plugin list` must not show memory-powermem, and + ~/.claude/settings.json enabledPlugins must not contain + "memory-powermem@powermem". If a stale enabledPlugins entry remains, remove + just that key (leave my other plugins untouched). + +3b. PIP path — remove the MCP server registration (idempotent): + claude mcp remove powermem 2>/dev/null || true + Verify `claude mcp list` no longer lists powermem. + +4. REMOVE THE PYTHON PACKAGE (idempotent). Uninstall the powermem package; this also + removes the powermem-server / powermem-mcp commands. Skip quietly if not installed: + pip uninstall -y powermem 2>/dev/null || true + Verify it is gone: `python -c "import powermem"` must fail, and `which powermem-server` + must return nothing. + +5. OPTIONAL CLEANUP — ask me before each of these; they are not required to disable + the integration, and some destroy data: + - Build artifacts (SOURCE): delete the compiled hook binaries (rm -rf never errors + when the dir is already gone): + rm -rf apps/claude-code-plugin/hooks/bin + (You may also restore the committed default if it drifted: + git checkout -- apps/claude-code-plugin/.mcp.json 2>/dev/null || true) + - Stored memories (DESTRUCTIVE — this erases all my saved memories): the embedded + seekdb data lives in `./seekdb_data/` (or the path in my .env). + For SQLite storage mode, data lives in `./sqlite_data/`. + Only delete it if I explicitly say so. + - Secrets: do NOT touch my .env unless I explicitly ask. If I do, redact the key + in any output. + +6. SUMMARIZE: which path applied, what was removed vs. already absent, confirmation + that the server is stopped and the plugin/MCP server is no longer registered, and + list anything left in place by design (e.g. .env, seekdb_data/, sqlite_data/, + the powermem package) so I know what — if anything — to clean up manually. + +For the install procedure, see SETUP.md. For the full manual reference, see +../../docs/integrations/claude_code.md diff --git a/apps/claude-code-plugin/cmd/powermem-hook/main.go b/apps/claude-code-plugin/cmd/powermem-hook/main.go index d03654f7..41dc1725 100644 --- a/apps/claude-code-plugin/cmd/powermem-hook/main.go +++ b/apps/claude-code-plugin/cmd/powermem-hook/main.go @@ -18,7 +18,7 @@ import ( ) // Default REST base when POWERMEM_BASE_URL is unset (matches .mcp.json local server). -const defaultPowerMemBaseURL = "http://localhost:8000" +const defaultPowerMemBaseURL = "http://localhost:8848" func main() { if len(os.Args) >= 2 { diff --git a/apps/claude-code-plugin/config/mcp-mode.mcp.json b/apps/claude-code-plugin/config/mcp-mode.mcp.json index c300597c..46269e26 100644 --- a/apps/claude-code-plugin/config/mcp-mode.mcp.json +++ b/apps/claude-code-plugin/config/mcp-mode.mcp.json @@ -2,7 +2,7 @@ "mcpServers": { "powermem": { "transport": "http", - "url": "http://localhost:8000/mcp" + "url": "http://localhost:8848/mcp" } } } diff --git a/apps/claude-code-plugin/hooks/hooks.json b/apps/claude-code-plugin/hooks/hooks.json index a66e6f5d..7b4ea7f2 100644 --- a/apps/claude-code-plugin/hooks/hooks.json +++ b/apps/claude-code-plugin/hooks/hooks.json @@ -1,5 +1,5 @@ { - "description": "Push Claude Code session transcripts (SessionEnd) and compact summaries (PostCompact) to PowerMem via HTTP. UserPromptSubmit: semantic search injects context by default; set POWERMEM_PROMPT_SEARCH=0 (or false/no/off) to disable. Uses native binaries under hooks/bin. macOS/Linux: sh launcher. Windows without sh: merge hooks/hooks.windows.example.json. POWERMEM_BASE_URL defaults to http://localhost:8000 if unset.", + "description": "Push Claude Code session transcripts (SessionEnd) and compact summaries (PostCompact) to PowerMem via HTTP. UserPromptSubmit: semantic search injects context by default; set POWERMEM_PROMPT_SEARCH=0 (or false/no/off) to disable. Uses native binaries under hooks/bin. macOS/Linux: sh launcher. Windows without sh: merge hooks/hooks.windows.example.json. POWERMEM_BASE_URL defaults to http://localhost:8848 if unset.", "hooks": { "UserPromptSubmit": [ { diff --git a/apps/claude-code-plugin/hooks/hooks.windows.example.json b/apps/claude-code-plugin/hooks/hooks.windows.example.json index 4d5d4f44..03ed28df 100644 --- a/apps/claude-code-plugin/hooks/hooks.windows.example.json +++ b/apps/claude-code-plugin/hooks/hooks.windows.example.json @@ -1,5 +1,5 @@ { - "description": "Windows: use this hook command shape if `sh` is not available. Merge into ~/.claude/settings.json or project .claude/settings.json under the same hook events. POWERMEM_BASE_URL defaults to http://localhost:8000 if unset.", + "description": "Windows: use this hook command shape if `sh` is not available. Merge into ~/.claude/settings.json or project .claude/settings.json under the same hook events. POWERMEM_BASE_URL defaults to http://localhost:8848 if unset.", "hooks": { "UserPromptSubmit": [ { diff --git a/apps/claude-code-plugin/watcher/README.md b/apps/claude-code-plugin/watcher/README.md index 597f80f1..b215345f 100644 --- a/apps/claude-code-plugin/watcher/README.md +++ b/apps/claude-code-plugin/watcher/README.md @@ -2,7 +2,7 @@ The poller lives in the same **native binary** as the Claude hooks (no Python). -From the plugin root. `POWERMEM_BASE_URL` defaults to `http://localhost:8000` if unset (optional `POWERMEM_API_KEY`): +From the plugin root. `POWERMEM_BASE_URL` defaults to `http://localhost:8848` if unset (optional `POWERMEM_API_KEY`): ```bash sh hooks/run-hook.sh poll diff --git a/apps/vscode-extension/README.md b/apps/vscode-extension/README.md index 80beef5f..2a5e5c16 100644 --- a/apps/vscode-extension/README.md +++ b/apps/vscode-extension/README.md @@ -13,15 +13,15 @@ Give Cursor, Claude Code, Codex, Windsurf, and Copilot access to [PowerMem](http ## Requirements - A running **PowerMem** backend: - - **HTTP API + MCP**: `powermem-server --host 0.0.0.0 --port 8000` (default), or - - **MCP only**: e.g. `uvx powermem-mcp sse` (port 8000) or `uvx powermem-mcp stdio`. + - **HTTP API + MCP**: `powermem-server --host 0.0.0.0 --port 8848` (default), or + - **MCP only**: e.g. `uvx powermem-mcp sse` (port 8848) or `uvx powermem-mcp stdio`. - PowerMem is configured (e.g. `.env` next to the server or in project root). ## Quick Start 1. Install this extension in VS Code or Cursor. 2. Start your PowerMem backend (see above). -3. Click the **PowerMem** status bar item; if disconnected, run **Setup** and set **Backend URL** (e.g. `http://localhost:8000`). +3. Click the **PowerMem** status bar item; if disconnected, run **Setup** and set **Backend URL** (e.g. `http://localhost:8848`). 4. Once connected, choose **Link to AI tools** to write configs for Cursor, Claude, Codex, Windsurf, and Copilot. 5. Use **Query memories** or **Add selection to memory** from the command palette or status bar menu. @@ -30,7 +30,7 @@ Give Cursor, Claude Code, Codex, Windsurf, and Copilot access to [PowerMem](http | Setting | Description | Default | |--------|-------------|---------| | `powermem.enabled` | Enable the extension | `true` | -| `powermem.backendUrl` | PowerMem backend URL | `http://localhost:8000` | +| `powermem.backendUrl` | PowerMem backend URL | `http://localhost:8848` | | `powermem.apiKey` | API key (X-API-Key) if required | (empty) | | `powermem.useMCP` | Write MCP config for AI tools; if false, write HTTP where supported | `true` | | `powermem.mcpServerPath` | Optional path/command for local MCP (e.g. `uvx`); empty = use backendUrl/mcp | (empty) | diff --git a/apps/vscode-extension/package.json b/apps/vscode-extension/package.json index 94267597..621e08f4 100644 --- a/apps/vscode-extension/package.json +++ b/apps/vscode-extension/package.json @@ -64,7 +64,7 @@ }, "powermem.backendUrl": { "type": "string", - "default": "http://localhost:8000", + "default": "http://localhost:8848", "description": "PowerMem server address. HTTP mode: used as the API base URL. MCP mode: used as the MCP root (e.g. {backendUrl}/mcp if MCP Server Path is empty)." }, "powermem.apiKey": { @@ -109,7 +109,7 @@ }, "powermem.autoCapture.maxChars": { "type": "number", - "default": 8000, + "default": 8848, "description": "Max characters per file to add to memory on auto-capture (avoids huge payloads)." }, "powermem.chat.autoSummarizeEveryNTurns": { diff --git a/apps/vscode-extension/src/api/client.ts b/apps/vscode-extension/src/api/client.ts index af30a00f..e2a5b0d9 100644 --- a/apps/vscode-extension/src/api/client.ts +++ b/apps/vscode-extension/src/api/client.ts @@ -1,6 +1,6 @@ /** * PowerMem HTTP API client for extension commands (search, add memory). - * Base URL e.g. http://localhost:8000; endpoints: /api/v1/memories/search, /api/v1/memories + * Base URL e.g. http://localhost:8848; endpoints: /api/v1/memories/search, /api/v1/memories */ import type { diff --git a/apps/vscode-extension/src/extension.ts b/apps/vscode-extension/src/extension.ts index b84f0654..a870e23e 100644 --- a/apps/vscode-extension/src/extension.ts +++ b/apps/vscode-extension/src/extension.ts @@ -10,7 +10,7 @@ import { searchMemories, addMemory } from './api/client'; import type { SearchResultItem } from './api/types'; import { registerChatParticipant } from './chat/participant'; -let backendUrl = 'http://localhost:8000'; +let backendUrl = 'http://localhost:8848'; let apiKey: string | undefined; let statusBar: vscode.StatusBarItem; let useMCP = true; @@ -182,7 +182,7 @@ async function showSetup(): Promise { if (!choice) return; switch (choice.action) { case 'url': { - const url = await vscode.window.showInputBox({ prompt: 'PowerMem backend URL', value: backendUrl, placeHolder: 'http://localhost:8000' }); + const url = await vscode.window.showInputBox({ prompt: 'PowerMem backend URL', value: backendUrl, placeHolder: 'http://localhost:8848' }); if (url) { await config.update('backendUrl', url, vscode.ConfigurationTarget.Global); backendUrl = url; @@ -234,7 +234,7 @@ async function showSetup(): Promise { export function activate(context: vscode.ExtensionContext): void { const config = vscode.workspace.getConfiguration('powermem'); isEnabled = config.get('enabled') ?? true; - backendUrl = config.get('backendUrl') || 'http://localhost:8000'; + backendUrl = config.get('backendUrl') || 'http://localhost:8848'; apiKey = config.get('apiKey') || undefined; useMCP = getUseMCPFromConfig(config); mcpServerPath = config.get('mcpServerPath') || ''; @@ -348,7 +348,7 @@ export function activate(context: vscode.ExtensionContext): void { vscode.workspace.onDidChangeConfiguration((e) => { if (!e.affectsConfiguration('powermem')) return; const c = vscode.workspace.getConfiguration('powermem'); - backendUrl = c.get('backendUrl') || 'http://localhost:8000'; + backendUrl = c.get('backendUrl') || 'http://localhost:8848'; apiKey = c.get('apiKey') || undefined; useMCP = getUseMCPFromConfig(c); mcpServerPath = c.get('mcpServerPath') || ''; diff --git a/docker/DOCKER.md b/docker/DOCKER.md index 792da63e..7117a58a 100644 --- a/docker/DOCKER.md +++ b/docker/DOCKER.md @@ -31,13 +31,13 @@ docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile . # This allows both SDK and Server to use the same configuration docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ -v $(pwd)/.env:/app/.env:ro \ --env-file .env \ oceanbase/powermem-server:latest ``` -The server will be available at `http://localhost:8000`. +The server will be available at `http://localhost:8848`. **Note**: If you have a `.env` file that's shared between the SDK and Server, use the first command with volume mount (`-v`) to ensure both components read from the same configuration file. See [Shared .env File](#shared-env-file-for-sdk-and-server) for more details. @@ -123,7 +123,7 @@ Currently, the Dockerfile uses a multi-stage build to optimize image size. The b ```bash docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ oceanbase/powermem-server:latest ``` @@ -132,9 +132,9 @@ docker run -d \ ```bash docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ -e POWERMEM_SERVER_HOST=0.0.0.0 \ - -e POWERMEM_SERVER_PORT=8000 \ + -e POWERMEM_SERVER_PORT=8848 \ -e POWERMEM_SERVER_WORKERS=4 \ -e POWERMEM_SERVER_API_KEYS=key1,key2,key3 \ -e POWERMEM_SERVER_AUTH_ENABLED=true \ @@ -148,7 +148,7 @@ Create a `.env` file: ```env POWERMEM_SERVER_HOST=0.0.0.0 -POWERMEM_SERVER_PORT=8000 +POWERMEM_SERVER_PORT=8848 POWERMEM_SERVER_WORKERS=4 POWERMEM_SERVER_API_KEYS=your-api-key-1,your-api-key-2 POWERMEM_SERVER_AUTH_ENABLED=true @@ -162,7 +162,7 @@ Run with the environment file: ```bash docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ --env-file .env \ oceanbase/powermem-server:latest ``` @@ -174,7 +174,7 @@ When both the SDK and Server need to use the same `.env` file, you can mount it ```bash docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ -v $(pwd)/.env:/app/.env:ro \ --env-file .env \ oceanbase/powermem-server:latest @@ -194,7 +194,7 @@ If you need to mount volumes for logs or configuration: ```bash docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ -v ./logs:/app/logs \ -v ./config:/app/config \ --env-file .env \ @@ -219,7 +219,7 @@ Mount the `.env` file as a read-only volume so the Server can read it directly: ```bash docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ -v $(pwd)/.env:/app/.env:ro \ --env-file .env \ oceanbase/powermem-server:latest @@ -237,7 +237,7 @@ If you prefer not to mount the file, you can use `--env-file` to load environmen ```bash docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ --env-file .env \ oceanbase/powermem-server:latest ``` @@ -254,7 +254,7 @@ The `.env` file contains configuration for both the PowerMem SDK and Server. The | Variable | Default | Description | |----------|---------|-------------| | `POWERMEM_SERVER_HOST` | `0.0.0.0` | Host to bind the server to | -| `POWERMEM_SERVER_PORT` | `8000` | Port to bind the server to | +| `POWERMEM_SERVER_PORT` | `8848` | Port to bind the server to | | `POWERMEM_SERVER_WORKERS` | `4` | Number of worker processes | | `POWERMEM_SERVER_RELOAD` | `false` | Enable auto-reload (development only) | @@ -321,10 +321,10 @@ services: dockerfile: docker/Dockerfile container_name: powermem-server ports: - - "8000:8000" + - "8848:8848" environment: - POWERMEM_SERVER_HOST=0.0.0.0 - - POWERMEM_SERVER_PORT=8000 + - POWERMEM_SERVER_PORT=8848 - POWERMEM_SERVER_WORKERS=4 - POWERMEM_SERVER_API_KEYS=${POWERMEM_SERVER_API_KEYS:-} - POWERMEM_SERVER_AUTH_ENABLED=${POWERMEM_SERVER_AUTH_ENABLED:-true} @@ -337,7 +337,7 @@ services: - ./logs:/app/logs restart: unless-stopped healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/system/health"] + test: ["CMD", "curl", "-f", "http://localhost:8848/api/v1/system/health"] interval: 30s timeout: 10s retries: 3 @@ -377,7 +377,7 @@ docker run -d \ --name powermem-server \ --memory="2g" \ --cpus="2" \ - -p 8000:8000 \ + -p 8848:8848 \ oceanbase/powermem-server:latest ``` @@ -402,7 +402,7 @@ services: container_name: powermem-server restart: always ports: - - "8000:8000" + - "8848:8848" environment: - POWERMEM_SERVER_WORKERS=8 - POWERMEM_SERVER_LOG_LEVEL=INFO @@ -417,7 +417,7 @@ services: cpus: '2' memory: 2G healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/system/health"] + test: ["CMD", "curl", "-f", "http://localhost:8848/api/v1/system/health"] interval: 30s timeout: 10s retries: 3 @@ -436,7 +436,7 @@ For production, it's recommended to use a reverse proxy (nginx, traefik, etc.): ```nginx # nginx.conf example upstream powermem { - server powermem-server:8000; + server powermem-server:8848; } server { @@ -485,7 +485,7 @@ docker exec powermem-server python -c "import psycopg; psycopg.connect('${POWERM 1. **Check if server is running**: ```bash -docker exec powermem-server curl -f http://localhost:8000/api/v1/system/health +docker exec powermem-server curl -f http://localhost:8848/api/v1/system/health ``` 2. **Check server logs**: @@ -495,13 +495,13 @@ docker logs powermem-server --tail 50 ### Port Already in Use -If port 8000 is already in use, change the port: +If port 8848 is already in use, change the port: ```bash docker run -d \ --name powermem-server \ - -p 8001:8000 \ - -e POWERMEM_SERVER_PORT=8000 \ + -p 8001:8848 \ + -e POWERMEM_SERVER_PORT=8848 \ oceanbase/powermem-server:latest ``` @@ -541,7 +541,7 @@ chmod 644 .env ```bash docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ --env-file .env \ oceanbase/powermem-server:latest ``` diff --git a/docker/Dockerfile b/docker/Dockerfile index 020a96fc..8c96c727 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -98,20 +98,20 @@ RUN chmod +x /usr/local/bin/docker-entrypoint.sh USER powermem # Expose default port -EXPOSE 8000 +EXPOSE 8848 # Set environment variables ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ POWERMEM_SERVER_HOST=0.0.0.0 \ - POWERMEM_SERVER_PORT=8000 + POWERMEM_SERVER_PORT=8848 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ - CMD curl -f http://localhost:8000/api/v1/system/health || exit 1 + CMD curl -f http://localhost:8848/api/v1/system/health || exit 1 # Use entrypoint script ENTRYPOINT ["docker-entrypoint.sh"] -# Default command: API server + PMem Dashboard on port 8000 -CMD ["powermem-server", "--host", "0.0.0.0", "--port", "8000"] +# Default command: API server + PMem Dashboard on port 8848 +CMD ["powermem-server", "--host", "0.0.0.0", "--port", "8848"] diff --git a/docker/README.md b/docker/README.md index 087e4bd6..96024bdb 100644 --- a/docker/README.md +++ b/docker/README.md @@ -43,7 +43,7 @@ From the project root directory: ```bash docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ -v $(pwd)/.env:/app/.env:ro \ --env-file .env \ oceanbase/powermem-server:latest @@ -52,8 +52,8 @@ docker run -d \ ## Services ### PowerMem Server -- Port: 8000 -- Health check: `http://localhost:8000/api/v1/system/health` +- Port: 8848 +- Health check: `http://localhost:8848/api/v1/system/health` - Database: Connected to seekdb without password ### seekdb Database @@ -89,7 +89,7 @@ The `docker-compose.yml` file includes default configuration values: **PowerMem Server:** - Host: `0.0.0.0` -- Port: `8000` +- Port: `8848` - Workers: `4` - Authentication: Disabled - CORS: Enabled for all origins diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 67c6a690..5f9337af 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -13,10 +13,10 @@ services: image: oceanbase/powermem-server:latest container_name: powermem-server ports: - - "${POWERMEM_SERVER_PORT:-8000}:8000" + - "${POWERMEM_SERVER_PORT:-8848}:8848" environment: - POWERMEM_SERVER_HOST=${POWERMEM_SERVER_HOST:-0.0.0.0} - - POWERMEM_SERVER_PORT=${POWERMEM_SERVER_PORT:-8000} + - POWERMEM_SERVER_PORT=${POWERMEM_SERVER_PORT:-8848} - POWERMEM_SERVER_WORKERS=${POWERMEM_SERVER_WORKERS:-4} - POWERMEM_SERVER_API_KEYS=${POWERMEM_SERVER_API_KEYS:-} - POWERMEM_SERVER_AUTH_ENABLED=${POWERMEM_SERVER_AUTH_ENABLED:-false} @@ -42,7 +42,7 @@ services: - ./logs:/app/logs restart: unless-stopped healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/system/health"] + test: ["CMD", "curl", "-f", "http://localhost:8848/api/v1/system/health"] interval: 30s timeout: 10s retries: 3 diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index eb13df7d..c11ecfc8 100644 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -41,7 +41,7 @@ main() { # Log configuration (without sensitive data) log "Server Configuration:" log " Host: ${POWERMEM_SERVER_HOST:-0.0.0.0}" - log " Port: ${POWERMEM_SERVER_PORT:-8000}" + log " Port: ${POWERMEM_SERVER_PORT:-8848}" log " Workers: ${POWERMEM_SERVER_WORKERS:-4}" log " Log Level: ${POWERMEM_SERVER_LOG_LEVEL:-INFO}" log " Auth Enabled: ${POWERMEM_SERVER_AUTH_ENABLED:-true}" diff --git a/docs/api/0002-async_memory.md b/docs/api/0002-async_memory.md index 1e4bbc67..ad36d949 100644 --- a/docs/api/0002-async_memory.md +++ b/docs/api/0002-async_memory.md @@ -263,18 +263,18 @@ async def batch_process(): asyncio.run(batch_process()) ``` -### Limitation: Embedded SeekDB Does Not Support Async +### Limitation: Embedded seekdb Does Not Support Async -Embedded SeekDB (local file mode with no `host` configured) uses a single-threaded C++ engine that **does not support concurrent multi-threaded access**. `AsyncMemory` internally submits synchronous operations to a `ThreadPoolExecutor`, which causes multiple threads to read and write the same embedded SeekDB instance simultaneously. This leads to C++-level crashes such as `pure virtual method called` or `Segmentation fault`. +Embedded seekdb (local file mode with no `host` configured) uses a single-threaded C++ engine that **does not support concurrent multi-threaded access**. `AsyncMemory` internally submits synchronous operations to a `ThreadPoolExecutor`, which causes multiple threads to read and write the same embedded seekdb instance simultaneously. This leads to C++-level crashes such as `pure virtual method called` or `Segmentation fault`. -**`AsyncMemory` cannot be used with embedded SeekDB.** Use the synchronous `Memory` class instead. +**`AsyncMemory` cannot be used with embedded seekdb.** Use the synchronous `Memory` class instead. ```python -# ❌ Not supported with embedded SeekDB +# ❌ Not supported with embedded seekdb from powermem import AsyncMemory async_memory = AsyncMemory(config=embedded_seekdb_config) # crashes -# ✓ Use the synchronous interface with embedded SeekDB +# ✓ Use the synchronous interface with embedded seekdb from powermem import Memory memory = Memory(config=embedded_seekdb_config) ``` @@ -294,5 +294,5 @@ Use `Memory` when: - Simple synchronous scripts - Interactive notebooks - Simple use cases without concurrency needs -- Using **embedded SeekDB** (local file mode, no `host`) +- Using **embedded seekdb** (local file mode, no `host`) diff --git a/docs/api/0004-mcp.md b/docs/api/0004-mcp.md index cceba55e..5dc26fdb 100644 --- a/docs/api/0004-mcp.md +++ b/docs/api/0004-mcp.md @@ -47,10 +47,10 @@ For PowerMem installation and configuration, please refer to: You can start PowerMem MCP with different protocols using the following commands: ```shell -uvx powermem-mcp sse # sse mode, default port 8000 (recommended) +uvx powermem-mcp sse # sse mode, default port 8848 (recommended) uvx powermem-mcp stdio # stdio mode uvx powermem-mcp sse 8001 # sse mode, specify port 8001 -uvx powermem-mcp streamable-http # streamable-http mode, default port 8000 +uvx powermem-mcp streamable-http # streamable-http mode, default port 8848 uvx powermem-mcp streamable-http 8001 # streamable-http mode, specify port 8001 ``` @@ -64,7 +64,7 @@ Claude Desktop config example: { "mcpServers": { "powermem": { - "url": "http://{host}:8000/mcp" + "url": "http://{host}:8848/mcp" } } } diff --git a/docs/api/0005-api_server.md b/docs/api/0005-api_server.md index 535dea7e..e554c6b4 100644 --- a/docs/api/0005-api_server.md +++ b/docs/api/0005-api_server.md @@ -18,14 +18,14 @@ The PowerMem HTTP API Server is built with FastAPI and provides: ```bash # Method 1: Install from powermem package, use CLI command pip install powermem -powermem-server --host 0.0.0.0 --port 8000 +powermem-server --host 0.0.0.0 --port 8848 # Method 2: Using Docker # Build and run with Docker docker build -t oceanbase/powermem-server:latest -f docker/Dockerfile . docker run -d \ --name powermem-server \ - -p 8000:8000 \ + -p 8848:8848 \ -v $(pwd)/.env:/app/.env:ro \ --env-file .env \ oceanbase/powermem-server:latest @@ -89,7 +89,7 @@ The PowerMem SDK configuration is the same as the previous v0.2.0 version, with POWERMEM_SERVER_HOST=0.0.0.0 # Server port number -POWERMEM_SERVER_PORT=8000 +POWERMEM_SERVER_PORT=8848 # Number of worker processes (only used when reload=false) POWERMEM_SERVER_WORKERS=4 @@ -156,12 +156,12 @@ You can use the following tools to interact with the API: + **curl**: Command-line tool + **Postman**: GUI tool -+ **Swagger UI**: Access via browser at `http://0.0.0.0:8000/docs` ++ **Swagger UI**: Access via browser at `http://0.0.0.0:8848/docs` ### Base URL ```plain -Base URL: http://0.0.0.0:8000 -API Base: http://0.0.0.0:8000/api/v1 +Base URL: http://0.0.0.0:8848 +API Base: http://0.0.0.0:8848/api/v1 ``` --- @@ -197,7 +197,7 @@ X-API-Key: test-api-key-123 **Request Example**: ```bash -curl -X GET "http://localhost:8000/api/v1/system/health" +curl -X GET "http://localhost:8848/api/v1/system/health" ``` **Response Example**: @@ -232,9 +232,9 @@ curl -X GET "http://localhost:8000/api/v1/system/health" **Request Example**: ```bash -curl -X GET "http://localhost:8000/api/v1/system/status" -i +curl -X GET "http://localhost:8848/api/v1/system/status" -i -curl -X GET "http://localhost:8000/api/v1/system/status" \ +curl -X GET "http://localhost:8848/api/v1/system/status" \ -H "X-API-Key: test-api-key-123" -i ``` @@ -274,7 +274,7 @@ curl -X GET "http://localhost:8000/api/v1/system/status" \ **Request Example**: ```bash -curl -X GET "http://localhost:8000/api/v1/system/metrics" \ +curl -X GET "http://localhost:8848/api/v1/system/metrics" \ -H "X-API-Key: test-api-key-123" ``` @@ -318,15 +318,15 @@ powermem_api_request_duration_seconds_count{method="GET",endpoint="/api/v1/syste ```bash # Delete all memories (system level) -curl -X DELETE "http://localhost:8000/api/v1/system/delete-all-memories" \ +curl -X DELETE "http://localhost:8848/api/v1/system/delete-all-memories" \ -H "X-API-Key: test-api-key-123" # Delete all memories for a specific agent -curl -X DELETE "http://localhost:8000/api/v1/system/delete-all-memories?agent_id=agent-456" \ +curl -X DELETE "http://localhost:8848/api/v1/system/delete-all-memories?agent_id=agent-456" \ -H "X-API-Key: test-api-key-123" # Delete all memories for a specific user -curl -X DELETE "http://localhost:8000/api/v1/system/delete-all-memories?user_id=user-123" \ +curl -X DELETE "http://localhost:8848/api/v1/system/delete-all-memories?user_id=user-123" \ -H "X-API-Key: test-api-key-123" ``` @@ -357,7 +357,7 @@ curl -X DELETE "http://localhost:8000/api/v1/system/delete-all-memories?user_id= **Request Example**: ```bash -curl -X POST "http://localhost:8000/api/v1/memories" \ +curl -X POST "http://localhost:8848/api/v1/memories" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -432,7 +432,7 @@ curl -X POST "http://localhost:8000/api/v1/memories" \ **Request Example**: ```bash -curl -X POST "http://localhost:8000/api/v1/memories/batch" \ +curl -X POST "http://localhost:8848/api/v1/memories/batch" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -530,27 +530,27 @@ curl -X POST "http://localhost:8000/api/v1/memories/batch" \ ```bash # Basic query -curl -X GET "http://localhost:8000/api/v1/memories?limit=10&offset=0" \ +curl -X GET "http://localhost:8848/api/v1/memories?limit=10&offset=0" \ -H "X-API-Key: test-api-key-123" # Filter by user -curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&limit=20&offset=0" \ +curl -X GET "http://localhost:8848/api/v1/memories?user_id=user-123&limit=20&offset=0" \ -H "X-API-Key: test-api-key-123" # Filter by agent -curl -X GET "http://localhost:8000/api/v1/memories?agent_id=agent-456&limit=50&offset=0" \ +curl -X GET "http://localhost:8848/api/v1/memories?agent_id=agent-456&limit=50&offset=0" \ -H "X-API-Key: test-api-key-123" # Sort by updated_at (descending - most recent first) -curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&limit=10&sort_by=updated_at&order=desc" \ +curl -X GET "http://localhost:8848/api/v1/memories?user_id=user-123&limit=10&sort_by=updated_at&order=desc" \ -H "X-API-Key: test-api-key-123" # Sort by created_at (ascending - oldest first) -curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&limit=10&sort_by=created_at&order=asc" \ +curl -X GET "http://localhost:8848/api/v1/memories?user_id=user-123&limit=10&sort_by=created_at&order=asc" \ -H "X-API-Key: test-api-key-123" # Combined: filter, pagination, and sorting -curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&agent_id=agent-456&limit=20&offset=0&sort_by=updated_at&order=desc" \ +curl -X GET "http://localhost:8848/api/v1/memories?user_id=user-123&agent_id=agent-456&limit=20&offset=0&sort_by=updated_at&order=desc" \ -H "X-API-Key: test-api-key-123" ``` @@ -625,11 +625,11 @@ curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&agent_id=age ```bash # First, list all memories to see available IDs -curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&agent_id=agent-456" \ +curl -X GET "http://localhost:8848/api/v1/memories?user_id=user-123&agent_id=agent-456" \ -H "X-API-Key: test-api-key-123" # Then query by specific ID -curl -X GET "http://localhost:8000/api/v1/memories/1?user_id=user-123&agent_id=agent-456" \ +curl -X GET "http://localhost:8848/api/v1/memories/1?user_id=user-123&agent_id=agent-456" \ -H "X-API-Key: test-api-key-123" ``` @@ -696,11 +696,11 @@ curl -X GET "http://localhost:8000/api/v1/memories/1?user_id=user-123&agent_id=a ```bash # First, list all memories to see available IDs -curl -X GET "http://localhost:8000/api/v1/memories?user_id=user-123&agent_id=agent-456" \ +curl -X GET "http://localhost:8848/api/v1/memories?user_id=user-123&agent_id=agent-456" \ -H "X-API-Key: test-api-key-123" # Update content -curl -X PUT "http://localhost:8000/api/v1/memories/658958031962243072" \ +curl -X PUT "http://localhost:8848/api/v1/memories/658958031962243072" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -710,7 +710,7 @@ curl -X PUT "http://localhost:8000/api/v1/memories/658958031962243072" \ }' # Update metadata -curl -X PUT "http://localhost:8000/api/v1/memories/658958031962243072" \ +curl -X PUT "http://localhost:8848/api/v1/memories/658958031962243072" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -724,7 +724,7 @@ curl -X PUT "http://localhost:8000/api/v1/memories/658958031962243072" \ }' # Update both content and metadata -curl -X PUT "http://localhost:8000/api/v1/memories/658958031962243072" \ +curl -X PUT "http://localhost:8848/api/v1/memories/658958031962243072" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -813,7 +813,7 @@ curl -X PUT "http://localhost:8000/api/v1/memories/658958031962243072" \ **Request Example**: ```bash -curl -X PUT "http://localhost:8000/api/v1/memories/batch" \ +curl -X PUT "http://localhost:8848/api/v1/memories/batch" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -957,7 +957,7 @@ curl -X PUT "http://localhost:8000/api/v1/memories/batch" \ **Request Example**: ```bash -curl -X DELETE "http://localhost:8000/api/v1/memories/658958021480677376?user_id=user-123&agent_id=agent-456" \ +curl -X DELETE "http://localhost:8848/api/v1/memories/658958021480677376?user_id=user-123&agent_id=agent-456" \ -H "X-API-Key: test-api-key-123" ``` @@ -994,7 +994,7 @@ curl -X DELETE "http://localhost:8000/api/v1/memories/658958021480677376?user_id **Request Example**: ```bash -curl -X DELETE "http://localhost:8000/api/v1/memories/batch" \ +curl -X DELETE "http://localhost:8848/api/v1/memories/batch" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1049,7 +1049,7 @@ curl -X DELETE "http://localhost:8000/api/v1/memories/batch" \ ```bash # First, create some data -curl -X POST "http://localhost:8000/api/v1/memories" -H "X-API-Key: test-api-key-123" -H "Content-Type: application/json" -d '{ +curl -X POST "http://localhost:8848/api/v1/memories" -H "X-API-Key: test-api-key-123" -H "Content-Type: application/json" -d '{ "content": "User likes coffee and goes to Starbucks every morning", "user_id": "user-123", "agent_id": "agent-456", @@ -1068,7 +1068,7 @@ curl -X POST "http://localhost:8000/api/v1/memories" -H "X-API-Key: test-api-k }' # Search -curl -X POST "http://localhost:8000/api/v1/memories/search" \ +curl -X POST "http://localhost:8848/api/v1/memories/search" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1139,7 +1139,7 @@ curl -X POST "http://localhost:8000/api/v1/memories/search" \ **Request Example**: ```bash -curl -X POST "http://localhost:8000/api/v1/memories/search" \ +curl -X POST "http://localhost:8848/api/v1/memories/search" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1226,7 +1226,7 @@ curl -X POST "http://localhost:8000/api/v1/memories/search" \ ```bash # Add messages and extract profile (default: only extract from user messages) -curl -X POST "http://localhost:8000/api/v1/users/user-123/profile" \ +curl -X POST "http://localhost:8848/api/v1/users/user-123/profile" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1243,7 +1243,7 @@ curl -X POST "http://localhost:8000/api/v1/users/user-123/profile" \ }' # Extract structured topics -curl -X POST "http://localhost:8000/api/v1/users/user-123/profile" \ +curl -X POST "http://localhost:8848/api/v1/users/user-123/profile" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1256,7 +1256,7 @@ curl -X POST "http://localhost:8000/api/v1/users/user-123/profile" \ }' # Include all messages (disable role filtering) -curl -X POST "http://localhost:8000/api/v1/users/user-123/profile" \ +curl -X POST "http://localhost:8848/api/v1/users/user-123/profile" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1335,7 +1335,7 @@ curl -X POST "http://localhost:8000/api/v1/users/user-123/profile" \ **Request Example**: ```bash -curl -X PUT "http://localhost:8000/api/v1/users/user-123/memories/658954684471443456" \ +curl -X PUT "http://localhost:8848/api/v1/users/user-123/memories/658954684471443456" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1390,7 +1390,7 @@ curl -X PUT "http://localhost:8000/api/v1/users/user-123/memories/65895468447144 **Request Example**: ```bash -curl -X GET "http://localhost:8000/api/v1/users/user-123/profile" \ +curl -X GET "http://localhost:8848/api/v1/users/user-123/profile" \ -H "X-API-Key: test-api-key-123" ``` @@ -1441,7 +1441,7 @@ curl -X GET "http://localhost:8000/api/v1/users/user-123/profile" \ **Request Example**: ```bash -curl -X DELETE "http://localhost:8000/api/v1/users/user-123/profile" \ +curl -X DELETE "http://localhost:8848/api/v1/users/user-123/profile" \ -H "X-API-Key: test-api-key-123" ``` @@ -1479,7 +1479,7 @@ curl -X DELETE "http://localhost:8000/api/v1/users/user-123/profile" \ **Request Example**: ```bash -curl -X GET "http://localhost:8000/api/v1/users/user-123/memories?limit=20&offset=0" \ +curl -X GET "http://localhost:8848/api/v1/users/user-123/memories?limit=20&offset=0" \ -H "X-API-Key: test-api-key-123" ``` @@ -1566,7 +1566,7 @@ curl -X GET "http://localhost:8000/api/v1/users/user-123/memories?limit=20&offse **Request Example**: ```bash -curl -X DELETE "http://localhost:8000/api/v1/users/user-123/memories" \ +curl -X DELETE "http://localhost:8848/api/v1/users/user-123/memories" \ -H "X-API-Key: test-api-key-123" ``` @@ -1607,7 +1607,7 @@ curl -X DELETE "http://localhost:8000/api/v1/users/user-123/memories" \ **Request Example**: ```bash -curl -X POST "http://localhost:8000/api/v1/agents/agent-456/memories" -H "X-API-Key: test-api-key-123" -H "Content-Type: application/json" -d '{ +curl -X POST "http://localhost:8848/api/v1/agents/agent-456/memories" -H "X-API-Key: test-api-key-123" -H "Content-Type: application/json" -d '{ "content": "Agent learned new conversation techniques", "user_id": "user-123", "run_id": "run-789" @@ -1715,7 +1715,7 @@ curl -X POST "http://localhost:8000/api/v1/agents/agent-456/memories" -H "X-AP **Request Example**: ```bash -curl -X GET "http://localhost:8000/api/v1/agents/agent-456/memories?limit=20&offset=0" \ +curl -X GET "http://localhost:8848/api/v1/agents/agent-456/memories?limit=20&offset=0" \ -H "X-API-Key: test-api-key-123" ``` @@ -1764,7 +1764,7 @@ curl -X GET "http://localhost:8000/api/v1/agents/agent-456/memories?limit=20&off ```bash # Share all memories -curl -X POST "http://localhost:8000/api/v1/agents/agent-456/memories/share" \ +curl -X POST "http://localhost:8848/api/v1/agents/agent-456/memories/share" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1772,7 +1772,7 @@ curl -X POST "http://localhost:8000/api/v1/agents/agent-456/memories/share" \ }' # Share specific memories -curl -X POST "http://localhost:8000/api/v1/agents/agent-456/memories/share" \ +curl -X POST "http://localhost:8848/api/v1/agents/agent-456/memories/share" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1816,7 +1816,7 @@ curl -X POST "http://localhost:8000/api/v1/agents/agent-456/memories/share" \ **Request Example**: ```bash -curl -X GET "http://localhost:8000/api/v1/agents/agent-789/memories/share?limit=20&offset=0" \ +curl -X GET "http://localhost:8848/api/v1/agents/agent-789/memories/share?limit=20&offset=0" \ -H "X-API-Key: test-api-key-123" ``` @@ -1873,7 +1873,7 @@ curl -X GET "http://localhost:8000/api/v1/agents/agent-789/memories/share?limit= ```bash # No API Key -curl -X GET "http://localhost:8000/api/v1/memories" +curl -X GET "http://localhost:8848/api/v1/memories" # Response { @@ -1901,7 +1901,7 @@ curl -X GET "http://localhost:8000/api/v1/memories" ```bash # Send 200 requests quickly for i in {1..200}; do - curl -X GET "http://localhost:8000/api/v1/memories" \ + curl -X GET "http://localhost:8848/api/v1/memories" \ -H "X-API-Key: test-api-key-123" & done @@ -1932,7 +1932,7 @@ done ```bash # Missing required field -curl -X POST "http://localhost:8000/api/v1/memories" \ +curl -X POST "http://localhost:8848/api/v1/memories" \ -H "X-API-Key: test-api-key-123" \ -H "Content-Type: application/json" \ -d '{ @@ -1980,11 +1980,11 @@ Use tools to measure endpoint response times: ```bash # Using curl to measure response time -time curl -X GET "http://localhost:8000/api/v1/memories" \ +time curl -X GET "http://localhost:8848/api/v1/memories" \ -H "X-API-Key: test-api-key-123" # Using httpie -http --timeout=5 GET "http://localhost:8000/api/v1/memories" \ +http --timeout=5 GET "http://localhost:8848/api/v1/memories" \ X-API-Key:test-api-key-123 ``` @@ -1997,11 +1997,11 @@ Use tools for concurrent load testing: ```bash # Using Apache Bench ab -n 1000 -c 10 -H "X-API-Key: test-api-key-123" \ - http://localhost:8000/api/v1/memories + http://localhost:8848/api/v1/memories # Using wrk wrk -t4 -c100 -d30s -H "X-API-Key: test-api-key-123" \ - http://localhost:8000/api/v1/memories + http://localhost:8848/api/v1/memories ``` --- diff --git a/docs/guides/0001-getting_started.md b/docs/guides/0001-getting_started.md index 33550dd0..cda58afe 100644 --- a/docs/guides/0001-getting_started.md +++ b/docs/guides/0001-getting_started.md @@ -590,4 +590,27 @@ else: print(f"✗ Failed to delete memories") ``` -> **Warning:** `delete_all()` permanently removes all memories for the specified user. This action cannot be undone. Use with extreme caution in production environments. Always implement proper authorization checks and user confirmation before allowing this operation. \ No newline at end of file +> **Warning:** `delete_all()` permanently removes all memories for the specified user. This action cannot be undone. Use with extreme caution in production environments. Always implement proper authorization checks and user confirmation before allowing this operation. + +## Ecosystem Integrations + +Beyond the Python SDK, PowerMem ships first-party integrations that give existing +AI clients and IDEs persistent memory — all pointing at the same backend (the +HTTP API server or the local `pmem` CLI), with no per-client schema rewrites. + +- **[Claude Code](../integrations/claude_code.md)** — Connect Claude Code to + PowerMem via the `memory-powermem` plugin. The default **HTTP mode** captures + every session silently through hooks (`SessionEnd` / `PostCompact` write the + transcript, `UserPromptSubmit` injects relevant memories per turn); an optional + **MCP mode** adds in-chat `search_memories` / `add_memory` tools. No Python is + required on the Claude Code machine. + +See **[Ecosystem Integrations](../integrations/overview.md)** for the full list, +and the **[Integrations Guide](./0009-integrations.md)** for framework wiring +(LangChain, LangGraph, FastAPI, custom providers). + +## Next Steps + +- [Configuration Guide](./0003-configuration.md) — providers, storage backends, environment variables +- [CLI Usage Guide](./0012-cli_usage.md) — the `pmem` command and interactive shell +- [Ecosystem Integrations](../integrations/overview.md) — Claude Code and other AI clients \ No newline at end of file diff --git a/docs/guides/overview.md b/docs/guides/overview.md index da9bb2d0..6d13162e 100644 --- a/docs/guides/overview.md +++ b/docs/guides/overview.md @@ -8,13 +8,21 @@ Comprehensive guides for using powermem effectively. - **[Configuration Guide](./0003-configuration.md)** - Configuration options - **[Custom Prompts Usage](./0004-custom_prompts_usage.md)** - Customize prompts for memory processing - **[Multi-Agent Guide](./0005-multi_agent.md)** - Multi-agent scenarios -- **[Integrations Guide](./0009-integrations.md)** - Third-party integrations +- **[Integrations Guide](./0009-integrations.md)** - Third-party integrations (LangChain, LangGraph, FastAPI, custom providers) - **[Sub Stores Guide](./0006-sub_stores.md)** - Advanced sub-store configuration - **[Multimodal Capability](../examples/scenario_7_multimodal.md)** - Image and multimedia processing - **[UserMemory Guide](./0010-user_memory.md)** - User profile management and extraction - **[Sparse Vector Guide](./0011-sparse_vector.md)** - Sparse vector configuration, upgrade and migration - **[CLI Usage Guide](./0012-cli_usage.md)** - PowerMem CLI (pmem) usage and reference (1.0.0+) +## Ecosystem Integrations + +First-party integrations that give existing AI clients and IDEs persistent +memory — all pointing at the same backend, with no per-client schema rewrites. + +- **[Ecosystem Integrations](../integrations/overview.md)** - Overview of all AI-client & IDE integrations +- **[Claude Code](../integrations/claude_code.md)** - `memory-powermem` plugin: silent HTTP-mode capture via hooks, optional MCP mode for in-chat tools + ## Other Notes - **[Other Notes](1000-other_notes.md)** - Important notes and usage guidelines @@ -26,6 +34,7 @@ Comprehensive guides for using powermem effectively. - **Multi-agent?** See [Multi-Agent Guide](./0005-multi_agent.md) - **Advanced features?** Check [Sub Stores Guide](./0006-sub_stores.md) - **Integrations?** Check [Integrations Guide](./0009-integrations.md) +- **Using Claude Code / an IDE?** See [Ecosystem Integrations](../integrations/overview.md) - **Multimodal?** See [Multimodal Capability](../examples/scenario_7_multimodal.md) - **User profiles?** See [UserMemory Guide](./0010-user_memory.md) - **Sparse vector?** See [Sparse Vector Guide](./0011-sparse_vector.md) diff --git a/docs/integrations/claude_code.md b/docs/integrations/claude_code.md new file mode 100644 index 00000000..aa96718a --- /dev/null +++ b/docs/integrations/claude_code.md @@ -0,0 +1,303 @@ +# Claude Code + +Give [Claude Code](https://code.claude.com) persistent, self-evolving memory through the first-party plugin (`memory-powermem`, under [`apps/claude-code-plugin/`](https://github.com/oceanbase/powermem/tree/main/apps/claude-code-plugin/)). + +This page is the single source of truth for the Claude Code integration — the plugin's own [`README.md`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/README.md) links here. + +## Fastest path — let Claude Code set itself up + +Open Claude Code in your terminal and paste this one line: + +```text +Read and follow apps/claude-code-plugin/SETUP.md to set up PowerMem memory for Claude Code. +``` + +Claude Code reads [`apps/claude-code-plugin/SETUP.md`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/SETUP.md) — the canonical automated-setup prompt — which detects whether you are in the PowerMem **source tree** (developer) or anywhere else (**pip user**), asks you for the few required secrets, and wires everything up end-to-end. + +Prefer to wire it by hand? The full plugin reference below covers every option. + +--- + +## Features + +- **Two connection modes** (aligned with the PowerMem VS Code extension). **HTTP mode is the default** (standard): REST-only via hooks, no PowerMem MCP tools in chat. **MCP mode** is optional when you want `search_memories` / `add_memory` in the conversation. See [Configuration](#configuration). +- **HTTP mode (default)**: Root `.mcp.json` ships with empty `mcpServers`. Hooks use **`POST /api/v1/memories`** (`POWERMEM_BASE_URL`, default `http://localhost:8848`). +- **MCP mode (optional)**: Copy [`config/mcp-mode.mcp.json`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/config/mcp-mode.mcp.json) to `.mcp.json` (or run `apply-connection-mode.sh mcp`). Claude gets PowerMem tools over **HTTP** `…/mcp` or **stdio**. +- **Skills**: `/memory-powermem:remember` and `/memory-powermem:recall` — effective in **MCP mode**; in default HTTP mode they cannot drive tools. +- **Seamless REST capture**: Hooks run in **both** modes. Optional **file poller** — see [watcher/README.md](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/watcher/README.md). +- **Auto-retrieval (no MCP required, on by default)**: The `UserPromptSubmit` hook calls **`POST /api/v1/memories/search`** with the user’s prompt and injects hits via [`additionalContext`](https://code.claude.com/docs/en/hooks#userpromptsubmit). Set **`POWERMEM_PROMPT_SEARCH=0`** (or `false` / `no` / `off`) to disable — saves a search round-trip per turn. Works in **HTTP and MCP** modes. + +## Runtime requirements (end users) + +| Piece | Needs Python? | Notes | +|--------|----------------|-------| +| Claude Code | No | | +| MCP tools | No | **Off by default** (HTTP mode). Run `apply-connection-mode.sh mcp` to enable. | +| **Hooks** (transcript / compact → HTTP API) | **No** | Native binaries under `hooks/bin/` + `run-hook.sh` (macOS/Linux) or PowerShell on Windows. **`POWERMEM_BASE_URL` defaults to `http://localhost:8848`.** | +| Optional **file poller** | No | Same binary: `sh hooks/run-hook.sh poll` — see [watcher/README.md](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/watcher/README.md). | + +**macOS / Linux:** default `hooks/hooks.json` runs `sh …/run-hook.sh`. POSIX `sh` is always present. + +**Windows (native, no Git Bash):** if `sh` is missing, merge the commands from [`hooks/hooks.windows.example.json`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/hooks/hooks.windows.example.json) into your Claude `settings.json` so hooks call `powershell.exe -File …/run-hook.ps1`. The zip includes `hooks/bin/powermem-hook-windows-amd64.exe` (add `windows/arm64` to the build script if you need it). + +**Rebuilding binaries** (developers / CI): Go **1.22+**, then `bash scripts/build-hook-binaries.sh` or `make build-claude-hook` from the repo root. `make package-claude-plugin` builds them automatically before zipping. + +## Prerequisites + +1. **PowerMem HTTP API** reachable from the machine running Claude (e.g. `powermem-server --port 8848`). Default hooks use **`http://localhost:8848`** — override with `POWERMEM_BASE_URL` for a remote server. +2. **MCP mode only:** additionally expose MCP (same host, usually `/mcp`) or stdio `powermem-mcp`, and switch `.mcp.json` via [`config/mcp-mode.mcp.json`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/config/mcp-mode.mcp.json). +3. **Claude Code** (VS Code extension or CLI) with plugin support. + +## Manual Installation + +Set up the integration **from source** — this is **HTTP mode** (the default): hooks push transcripts to the REST API and inject search results per turn, with no in-chat tools. + +### Step 1 — Download the source + +```bash +git clone https://github.com/oceanbase/powermem +cd powermem +``` + +### Step 2 — Configure `.env` + +Copy the template and set your **LLM API key** — the only required credential. Storage defaults to the embedded **seekdb** (no separate database), and the embedder to a local `all-MiniLM-L6-v2` model (no API key, auto-downloaded on first use). + +```bash +cp .env.example .env +# then edit .env and set at least: +# LLM_PROVIDER=anthropic # or openai / qwen / ... +# LLM_API_KEY=sk-... +# LLM_MODEL=claude-3-5-sonnet-latest +``` + +Every available setting is documented under [Configuration](#configuration); `pmem config init` can also generate `.env` interactively. + +### Step 3 — Install PowerMem and build the hook binaries + +`pip install -e .` provides the `powermem-server` and `pmem` commands; `make build-claude-hook` compiles the native Go hook binaries (requires **Go 1.22+**): + +```bash +pip install -e . +make build-claude-hook # outputs apps/claude-code-plugin/hooks/bin/ +``` + +### Step 4 — Start the HTTP API server + +Hooks default to `http://localhost:8848`. Leave this running (or start it as a background service): + +```bash +powermem-server --host 0.0.0.0 --port 8848 +``` + +### Step 5 — Load the plugin into Claude Code + +```bash +claude --plugin-dir "$(pwd)/apps/claude-code-plugin" +``` + +### Step 6 — Verify + +End the session (or run `/compact`), then look for `POST /api/v1/memories` in the server log; run `/hooks` inside Claude Code to confirm the entries are registered. See [Troubleshooting](#troubleshooting-no-requests-while-vibe-coding) if nothing shows up. + +--- + +### Other ways to load the plugin + +#### Option A: Load from directory (development) + +```bash +claude --plugin-dir /path/to/powermem/apps/claude-code-plugin +``` + +#### Option B: Install from marketplace (not yet available) + +This plugin is **not yet published** to a Claude Code plugin marketplace — use `--plugin-dir` (Option A) for now. (Note: the `memory-powermem` listing you may find in plugin directories is the [OpenClaw plugin](https://github.com/ob-labs/memory-powermem), a different ecosystem.) Once a marketplace release is available, you will be able to install it from there. + +#### Option C: Pack and copy to another machine (offline / internal) + +From the **powermem repo root**: + +```bash +make package-claude-plugin +``` + +Or run the script directly: + +```bash +bash apps/claude-code-plugin/scripts/package-plugin.sh +``` + +This writes **`apps/claude-code-plugin/dist/powermem-claude-code-plugin-.zip`**. Share that zip (USB, internal artifact server, etc.). + +**On the other computer:** + +1. Unzip → you get a folder `powermem-claude-code-plugin/` containing `.claude-plugin/`, `hooks/`, `skills/`, `.mcp.json`, etc. +2. Point Claude Code at that folder (absolute path recommended): + + ```bash + # Optional: hooks default to http://localhost:8848 if POWERMEM_BASE_URL is unset + export POWERMEM_BASE_URL=https://your-team-powermem.example.com # team server only + claude --plugin-dir /path/to/powermem-claude-code-plugin + ``` + +3. Requirements on that machine: **no Python**; use **macOS/Linux** `sh` or follow **Windows** PowerShell hooks above. **HTTP API** must be reachable for hooks (and `/mcp` too if you enable MCP mode). + +To publish a zip **with MCP enabled by default**, replace root `.mcp.json` with `config/mcp-mode.mcp.json` before `make package-claude-plugin`, or document that users run `apply-connection-mode.sh mcp`. + +## Uninstall and update + +### Uninstall + +How you remove the plugin depends on how you enabled it: + +| How you installed | What to do | +|-------------------|------------| +| **`claude --plugin-dir /path/to/...`** | Stop passing `--plugin-dir` (remove it from shell aliases, scripts, or IDE task). Optionally delete the plugin folder. Nothing is left in `~/.claude` **unless** you also changed global settings (see below). | +| **Zip / copied folder** | Delete the unzipped directory. Stop using `--plugin-dir` pointing at it. | +| **Git clone / repo path** | Stop using `--plugin-dir` for that path; remove the clone if you no longer need it. | +| **Marketplace / built-in plugin UI** *(not yet available)* | Reserved for when the plugin is published to a Claude Code marketplace: you would disable or uninstall **memory-powermem** in Claude Code’s plugin settings ([Claude Code plugins](https://code.claude.com/docs/en/plugins)). Today the plugin is loaded via `--plugin-dir`, so use the rows above. | +| **You merged [`hooks/hooks.windows.example.json`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/hooks/hooks.windows.example.json) into `settings.json`** | Edit `~/.claude/settings.json` or `.claude/settings.json` in the project and remove the `UserPromptSubmit` / `SessionEnd` / `PostCompact` hook entries that call `run-hook.ps1` (or restore a backup). Otherwise hooks keep running even after the plugin folder is deleted. | + +The hook binary only **writes** to your PowerMem server; it does not install a system daemon. No separate “service uninstall” is required. + +### Update + +| Install style | Update steps | +|---------------|--------------| +| **Zip** | Download the new `.zip`, replace the old folder (delete the previous `powermem-claude-code-plugin` tree, unzip the new one to the same or a new path), then start Claude with `--plugin-dir` pointing at the new folder. | +| **Repo / `git`** | `git pull` (or fetch the release you want), run `make package-claude-plugin` or `bash scripts/package-plugin.sh` if you need a fresh zip, then restart Claude Code. | +| **Marketplace** *(not yet available)* | Once published, you would “update” / reinstall from the marketplace when a new version ships. Until then, update via the **Repo / `git`** row above. | + +After updating, restart the Claude Code session (or the whole app) so MCP config, skills, and hooks reload. + +## Configuration + +### Two PowerMem modes (HTTP default, MCP optional) + +Same **MCP / HTTP** split as elsewhere in PowerMem. **Standard shipping = HTTP mode**: root `.mcp.json` has **`mcpServers: {}`**. **Hooks always use REST** in both modes. + +| Mode | Plugin root `.mcp.json` | Claude in-chat | Silent capture (hooks → REST) | +|------|-------------------------|----------------|--------------------------------| +| **HTTP mode (default)** | Empty `mcpServers` — same as [`config/http-mode.mcp.json`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/config/http-mode.mcp.json) | No PowerMem MCP tools | Yes (`POWERMEM_BASE_URL`, default `http://localhost:8848`) | +| **MCP mode** | Includes `powermem` — [`config/mcp-mode.mcp.json`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/config/mcp-mode.mcp.json) | Yes — `search_memories`, `add_memory`, … | Yes | + +**Switch mode** (from the plugin directory): + +```bash +bash scripts/apply-connection-mode.sh http # restore standard (default) HTTP-only mode +bash scripts/apply-connection-mode.sh mcp # enable in-chat PowerMem tools +``` + +Restart Claude Code after changing `.mcp.json`. See [`config/README.md`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/config/README.md). + +**Naming note:** In **MCP mode**, `transport: "http"` means “connect to the **MCP** endpoint over HTTP” (`https://host/mcp`), not “replace MCP with REST.” **HTTP mode** means “no MCP entry for PowerMem”; REST is still used by hooks. + +### MCP mode: team or local URL + +After `apply-connection-mode.sh mcp`, edit `.mcp.json` or `config/mcp-mode.mcp.json` before copying. Same host as your REST API, MCP path is usually `/mcp`: + +```json +{ + "mcpServers": { + "powermem": { + "transport": "http", + "url": "https://powermem.example.com/mcp" + } + } +} +``` + +**stdio MCP** (local `powermem-mcp` process) — in **MCP mode**, replace the `powermem` block with: + +```json +{ + "mcpServers": { + "powermem": { + "transport": "stdio", + "command": "uvx", + "args": ["powermem-mcp", "stdio"] + } + } +} +``` + +Ensure PowerMem is installed (`pip install powermem`) and a `.env` is available when using stdio. + +### HTTP mode: REST only (standard) + +This is the **default** root `.mcp.json`. Claude has **no** PowerMem MCP tools; skills that reference those tools have nothing to call. **Hooks** still send transcripts / compact summaries to `POST /api/v1/memories`. To reset after trying MCP: `bash scripts/apply-connection-mode.sh http`. + +### Seamless recording (hooks + HTTP API) + +The plugin ships [`hooks/hooks.json`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/hooks/hooks.json), [`hooks/run-hook.sh`](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/hooks/run-hook.sh), and **native** `hooks/bin/powermem-hook-*` (built from [`cmd/powermem-hook`](https://github.com/oceanbase/powermem/tree/main/apps/claude-code-plugin/cmd/powermem-hook/)). When the plugin is enabled, Claude Code merges these hooks: + +| Hook | What happens | +|------|----------------| +| `UserPromptSubmit` | By default, **`POST …/api/v1/memories/search`** with the submitted `prompt`; top results are injected as **additional context** for that turn ([Claude Code hooks](https://code.claude.com/docs/en/hooks#userpromptsubmit)). Set **`POWERMEM_PROMPT_SEARCH=0`** (or `false` / `no` / `off`) to skip search (hook still registered; overhead is small when disabled). | +| `SessionEnd` | Full **transcript** from `transcript_path` (parsed JSONL: user/assistant/summary lines) → **`POST …/api/v1/memories`**. | +| `PostCompact` | The **`compact_summary`** field after `/compact` or auto-compact → **`POST …/api/v1/memories`**. | + +**Write** hooks use `POST {POWERMEM_BASE_URL}/api/v1/memories`. **Prompt search** uses `POST {POWERMEM_BASE_URL}/api/v1/memories/search`. Neither path requires MCP. + +Optional environment variables (where you launch Claude Code): + +| Variable | Required | Description | +|----------|----------|-------------| +| `POWERMEM_BASE_URL` | No | Defaults to **`http://localhost:8848`** (same host as default `.mcp.json`, without `/mcp`). Set for a team gateway, e.g. `https://powermem.example.com`. | +| `POWERMEM_API_KEY` | If server uses auth | Sent as `X-API-Key` | +| `POWERMEM_USER_ID` | No | Defaults to OS login name | +| `POWERMEM_AGENT_ID` | No | Optional `agent_id` on memories | +| `POWERMEM_HOOK_MAX_CHARS` | No | Transcript cap (default `120000`) | +| `POWERMEM_INFER_TRANSCRIPT` | No | Set `1` to enable server-side infer on large transcripts (default off) | +| `POWERMEM_INFER_COMPACT` | No | Set `0` to disable infer on compact summaries (default on) | +| `POWERMEM_PROMPT_SEARCH` | No | **Default: on** — injects semantic search results on every user prompt via `UserPromptSubmit`. Set **`0`** / **`false`** / **`no`** / **`off`** to disable. | +| `POWERMEM_PROMPT_SEARCH_LIMIT` | No | Max memories returned per prompt (default **8**, cap **30**). | +| `POWERMEM_PROMPT_SEARCH_MAX_CHARS` | No | Cap on injected context string (default **24000**). | + +**SessionEnd timeout:** Claude Code defaults to a short timeout for `SessionEnd` hooks. The hook **returns immediately** and uploads in a **detached worker process**, so large transcripts still upload without blocking exit. If you ever switch to a synchronous upload inside the hook, raise `CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS` (see [Claude Code hooks – SessionEnd](https://code.claude.com/docs/en/hooks#sessionend)). + +### Troubleshooting: “no requests” while vibe-coding + +What you see is often **expected**: + +1. **Default HTTP mode** — There are **no** PowerMem MCP tools during chat, so Claude does **not** call `/mcp` on each message. **`POST /api/v1/memories`** (writes) still come from **`SessionEnd`** / **`PostCompact`**, not every reply. By default, **`POST /api/v1/memories/search`** runs **on each user message** via `UserPromptSubmit`; set **`POWERMEM_PROMPT_SEARCH=0`** to turn that off. +2. **Not every hook is per-turn** — `SessionEnd` runs when the **session ends** (quit, `/clear`, `/resume` switch, etc.). `PostCompact` runs after **manual or auto compact**, not after every reply. +3. **Those GETs** (`/system/status`, `/memories/stats`, …) usually come from another client (e.g. **PowerMem VS Code extension** dashboard), not from Claude Code hooks. + +**How to verify hooks:** + +- **End the Claude Code session** (exit the CLI session that used `--plugin-dir`), then check server logs for **`POST /api/v1/memories`** (the worker runs shortly after exit). +- Or trigger **`/compact`** (or wait for auto-compact) and look for a compact-summary write. +- In Claude Code, type **`/hooks`** and confirm `UserPromptSubmit` (if present) / `SessionEnd` / `PostCompact` list this plugin’s command (see [hooks menu](https://code.claude.com/docs/en/hooks#the-hooks-menu)). + +**If you want traffic during the conversation:** + +- **`POWERMEM_PROMPT_SEARCH` is on by default**, so each user message triggers **`POST /api/v1/memories/search`** and retrieved memories are **injected automatically** (no MCP tools needed). Set **`POWERMEM_PROMPT_SEARCH=0`** to turn that off. +- Or switch to **MCP mode** (`bash scripts/apply-connection-mode.sh mcp`) so Claude can call memory tools when it chooses — traffic goes to **`/mcp`**, not necessarily the same paths as the dashboard GETs. +- Or rely on **VS Code extension** save capture / `sh hooks/run-hook.sh poll` for file-based writes. + +### Optional: workspace file watcher (CLI / no VS Code) + +If engineers use **Claude Code without** the [PowerMem VS Code extension](https://github.com/oceanbase/powermem/tree/main/apps/vscode-extension/) (which already **auto-captures on save** against `powermem.backendUrl`), run the native poller: + +```bash +export POWERMEM_BASE_URL=https://powermem.example.com +export POWERMEM_API_KEY=... # if required +export POWERMEM_WATCH_ROOT=/path/to/repo +sh hooks/run-hook.sh poll +``` + +See [watcher/README.md](https://github.com/oceanbase/powermem/blob/main/apps/claude-code-plugin/watcher/README.md) for environment variables. + +## Usage + +- **Default (HTTP mode):** Hooks capture to REST automatically; no PowerMem tools in chat. **Per-prompt semantic retrieval is on by default** (see [Seamless recording](#seamless-recording-hooks--http-api)); set **`POWERMEM_PROMPT_SEARCH=0`** to disable. +- **MCP mode:** Run `apply-connection-mode.sh mcp`, then PowerMem tools appear; use **/memory-powermem:remember** / **recall** with real tool backing. Per-prompt injection stays **on by default**; set **`POWERMEM_PROMPT_SEARCH=0`** if you only want explicit MCP tool use. +- In **both** modes, transcript/compact hooks write to REST (`POWERMEM_BASE_URL`, default `http://localhost:8848`) without the model calling tools. + +## Links + +- [PowerMem](https://github.com/oceanbase/powermem) +- [PowerMem MCP docs](../api/0004-mcp.md) +- [Claude Code hooks reference](https://code.claude.com/docs/en/hooks) diff --git a/docs/integrations/overview.md b/docs/integrations/overview.md new file mode 100644 index 00000000..31ffb429 --- /dev/null +++ b/docs/integrations/overview.md @@ -0,0 +1,22 @@ +# Ecosystem Integrations + +First-party integrations that connect PowerMem to AI clients, IDEs, and agent +frameworks. Every integration points at the same backend (the HTTP API server or +the local `pmem` CLI) — there are no per-client schema rewrites. + +## AI clients & IDEs + +- **[Claude Code](./claude_code.md)** — Plugin (`memory-powermem`) with silent + HTTP-mode capture via hooks and an optional MCP mode for in-chat + `search_memories` / `add_memory` tools. + +## Frameworks & SDKs + +For LangChain, LangGraph, FastAPI, and custom LLM / embedding / storage +providers, see the **[Integrations Guide](../guides/0009-integrations.md)**. + +## See also + +- [Getting Started](../guides/0001-getting_started.md) — install, `.env`, first `Memory` usage +- [MCP API](../api/0004-mcp.md) — Model Context Protocol server +- [HTTP API Server](../api/0005-api_server.md) — REST endpoints used by the integrations diff --git a/docs/website/sidebars.ts b/docs/website/sidebars.ts index ebf73c3f..bd7a7467 100644 --- a/docs/website/sidebars.ts +++ b/docs/website/sidebars.ts @@ -12,6 +12,16 @@ const sidebars: SidebarsConfig = { }, ], }, + { + type: 'category', + label: 'Integrations', + items: [ + { + type: 'autogenerated', + dirName: 'integrations', + }, + ], + }, { type: 'category', label: 'API Reference', diff --git a/examples/README.md b/examples/README.md index a5be4ad9..866fc7be 100644 --- a/examples/README.md +++ b/examples/README.md @@ -107,9 +107,9 @@ This directory contains various examples demonstrating how to use powermem with # Go Client Example (requires Go 1.21+ and running PowerMem server) cd examples/go - go run . # Uses default http://localhost:8000 + go run . # Uses default http://localhost:8848 # Or with custom config: - POWERMEM_BASE_URL=http://localhost:8000 POWERMEM_API_KEY=your-key go run . + POWERMEM_BASE_URL=http://localhost:8848 POWERMEM_API_KEY=your-key go run . ``` ## Database Backends diff --git a/examples/go/README.md b/examples/go/README.md index 557cad67..95f01125 100644 --- a/examples/go/README.md +++ b/examples/go/README.md @@ -20,7 +20,7 @@ A simple, lightweight Go client example demonstrating how to integrate PowerMem' ```bash pip install powermem -powermem-server --host 0.0.0.0 --port 8000 +powermem-server --host 0.0.0.0 --port 8848 ``` #### Configure API Keys (Required) @@ -55,12 +55,12 @@ POWERMEM_SERVER_API_KEYS=your-api-key-123,another-key-456 ```bash cd examples/go -# Run with default settings (localhost:8000, no auth) +# Run with default settings (localhost:8848, no auth) go run . # Or with custom configuration # Base URL of the PowerMem API server -export POWERMEM_BASE_URL=http://localhost:8000 +export POWERMEM_BASE_URL=http://localhost:8848 # API key for authentication (if server auth enabled) export POWERMEM_API_KEY=your-api-key-123 go run . @@ -73,7 +73,7 @@ go run . Check the health status of the PowerMem API server. This is a public endpoint that does not require authentication. ```go -client := NewClient("http://localhost:8000", "your-api-key") +client := NewClient("http://localhost:8848", "your-api-key") health, err := client.Health() fmt.Printf("Status: %s\n", health.Status) ``` diff --git a/examples/go/client.go b/examples/go/client.go index c56af176..cc8cc4ce 100644 --- a/examples/go/client.go +++ b/examples/go/client.go @@ -19,7 +19,7 @@ import ( // Client is a PowerMem API client. type Client struct { // BaseURL is the base URL of the PowerMem API server. - // Example: "http://localhost:8000" + // Example: "http://localhost:8848" BaseURL string // APIKey is the API key for authentication. diff --git a/examples/go/main.go b/examples/go/main.go index b17a9c5b..153e3c7d 100644 --- a/examples/go/main.go +++ b/examples/go/main.go @@ -14,7 +14,7 @@ // // Environment variables: // -// POWERMEM_BASE_URL - Base URL of the PowerMem API server (default: http://localhost:8000) +// POWERMEM_BASE_URL - Base URL of the PowerMem API server (default: http://localhost:8848) // POWERMEM_API_KEY - API key for authentication (optional if auth is disabled) package main @@ -47,7 +47,7 @@ func main() { func initClient() *Client { baseURL := os.Getenv("POWERMEM_BASE_URL") if baseURL == "" { - baseURL = "http://localhost:8000" + baseURL = "http://localhost:8848" } apiKey := os.Getenv("POWERMEM_API_KEY") diff --git a/examples/moonbit/README.md b/examples/moonbit/README.md index 62f38a41..40417b24 100644 --- a/examples/moonbit/README.md +++ b/examples/moonbit/README.md @@ -20,7 +20,7 @@ A simple, lightweight MoonBit client example demonstrating how to integrate Powe ```bash pip install powermem -powermem-server --host 0.0.0.0 --port 8000 +powermem-server --host 0.0.0.0 --port 8848 ``` #### Configure API Keys (Required) @@ -55,13 +55,13 @@ POWERMEM_SERVER_API_KEYS=your-api-key-123,another-key-456 ```bash cd examples/moonbit -# Run with default settings (localhost:8000, no auth). +# Run with default settings (localhost:8848, no auth). # `moon run` fetches dependencies on first invocation. moon run --target native . # Or with custom configuration # Base URL of the PowerMem API server -export POWERMEM_BASE_URL=http://localhost:8000 +export POWERMEM_BASE_URL=http://localhost:8848 # API key for authentication (if server auth enabled) export POWERMEM_API_KEY=your-api-key-123 moon run --target native . @@ -74,7 +74,7 @@ moon run --target native . Check the health status of the PowerMem API server. This is a public endpoint that does not require authentication. ```moonbit -let client = Client::new("http://localhost:8000", api_key="your-api-key") +let client = Client::new("http://localhost:8848", api_key="your-api-key") let health = client.health() println("Status: \{health.status}") ``` diff --git a/pyproject.toml b/pyproject.toml index 88fb1fbe..16f7e662 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ dependencies = [ "dashscope>=1.14.0", "psutil>=5.9.0", "zai-sdk>=0.2.0", + "pyseekdb>=1.3.0", ] [project.optional-dependencies] @@ -66,9 +67,9 @@ mcp = [ "fastmcp>=1.0", "uvicorn>=0.27.1", ] -seekdb = [ - "pyseekdb>=0.1.0", -] +# pyseekdb is now a core dependency (it backs the zero-config default embedder), +# so this extra is kept only for backward compatibility with `powermem[seekdb]`. +seekdb = [] extras = [ "sentence-transformers>=5.0.0", ] diff --git a/src/powermem/cli/commands/config.py b/src/powermem/cli/commands/config.py index d3555800..48b5ff85 100644 --- a/src/powermem/cli/commands/config.py +++ b/src/powermem/cli/commands/config.py @@ -830,12 +830,12 @@ def _wizard_database(existing: Dict[str, str]) -> Dict[str, str]: if provider == "oceanbase": updates["OCEANBASE_HOST"] = click.prompt( - "OceanBase host (empty for embedded SeekDB)", + "OceanBase host (empty for embedded seekdb)", default=existing.get("OCEANBASE_HOST") or "", show_default=True, ) updates["OCEANBASE_PATH"] = click.prompt( - "OceanBase embedded SeekDB path (used when host is empty)", + "OceanBase embedded seekdb path (used when host is empty)", default=existing.get("OCEANBASE_PATH") or "./seekdb_data", show_default=True, ) @@ -924,12 +924,12 @@ def _wizard_database_quickstart(existing: Dict[str, str]) -> Dict[str, str]: if provider == "oceanbase": updates["OCEANBASE_HOST"] = click.prompt( - "OceanBase host (empty for embedded SeekDB)", + "OceanBase host (empty for embedded seekdb)", default=existing.get("OCEANBASE_HOST") or "", show_default=True, ) updates["OCEANBASE_PATH"] = click.prompt( - "OceanBase embedded SeekDB path (used when host is empty)", + "OceanBase embedded seekdb path (used when host is empty)", default=existing.get("OCEANBASE_PATH") or "./seekdb_data", show_default=True, ) diff --git a/src/powermem/config_loader.py b/src/powermem/config_loader.py index 08d07e09..cf43b1e6 100644 --- a/src/powermem/config_loader.py +++ b/src/powermem/config_loader.py @@ -17,6 +17,7 @@ from powermem.integrations.embeddings.config.sparse_base import BaseSparseEmbedderConfig from powermem.integrations.llm.config.base import BaseLLMConfig from powermem.settings import _DEFAULT_ENV_FILE, settings_config +from powermem.utils.utils import detect_system_timezone def _load_dotenv_if_available() -> None: @@ -123,65 +124,66 @@ class DatabaseSettings(_BasePowermemSettings): model_config = settings_config() provider: str = Field( - default="sqlite", + default="oceanbase", validation_alias=AliasChoices("DATABASE_PROVIDER"), ) def to_config(self) -> Dict[str, Any]: """ Convert settings to VectorStore configuration dictionary. - + Provider-specific fields are automatically loaded from environment variables by the provider config class. """ from powermem.storage.config.base import BaseVectorStoreConfig - + db_provider = self.provider.lower() - + # Handle postgres alias if db_provider == "postgres": db_provider = "pgvector" - + # 1. Get provider config class from registry config_cls = ( BaseVectorStoreConfig.get_provider_config_cls(db_provider) or BaseVectorStoreConfig ) - + # 2. Create provider settings from environment variables # All provider-specific fields are loaded here automatically provider_settings = config_cls() - + # 3. Export to dict vector_store_config = provider_settings.model_dump(exclude_none=True) - + # 4. For OceanBase, build connection_args for backward compatibility if db_provider == "oceanbase": connection_args = {} for key in ["host", "port", "user", "password", "db_name", "ob_path"]: if key in vector_store_config: connection_args[key] = vector_store_config[key] - + # Only add connection_args if we have connection parameters if connection_args: vector_store_config["connection_args"] = connection_args - + return {"provider": db_provider, "config": vector_store_config} class LLMSettings(_BasePowermemSettings): """ Unified LLM configuration settings. - + This class provides a common interface for configuring LLM providers. It only contains fields that are common across all providers. Provider-specific fields (e.g., dashscope_base_url for Qwen) should be set via environment variables and will be loaded by the respective provider config classes. - + Design rationale: This follows the same pattern as EmbeddingSettings, keeping the unified settings simple and delegating provider-specific configuration to the provider config classes. """ + model_config = settings_config("LLM_") provider: str = Field(default="qwen") @@ -202,13 +204,13 @@ class LLMSettings(_BasePowermemSettings): def to_config(self) -> Dict[str, Any]: """ Convert settings to LLM configuration dictionary. - + This method: 1. Gets the appropriate provider config class 2. Creates an instance (loading provider-specific fields from environment) 3. Overrides with explicitly set common fields from this settings object 4. Returns the final configuration - + Provider-specific fields (e.g., dashscope_base_url, enable_search) are automatically loaded from environment variables by the provider config class. """ @@ -253,7 +255,10 @@ def to_config(self) -> Dict[str, Any]: class EmbeddingSettings(_BasePowermemSettings): model_config = settings_config("EMBEDDING_") - provider: str = Field(default="qwen") + # Default follows .env.example.full (EMBEDDING_PROVIDER=default): the + # built-in local all-MiniLM-L6-v2 embedder (384 dims, no API key), so + # PowerMem starts zero-config. Set EMBEDDING_PROVIDER to use a cloud one. + provider: str = Field(default="default") api_key: Optional[str] = Field(default=None) model: Optional[str] = Field(default=None) embedding_dims: Optional[int] = Field( @@ -337,7 +342,9 @@ def to_config(self) -> Dict[str, Any]: class TimezoneSettings(_BasePowermemSettings): model_config = settings_config() - timezone: str = Field(default="UTC") + # When TIMEZONE is unset, auto-detect the host machine's zone (falling back + # to UTC). An explicit TIMEZONE env var always overrides this default. + timezone: str = Field(default_factory=detect_system_timezone) def to_config(self) -> Dict[str, Any]: return self.model_dump() @@ -356,30 +363,30 @@ class RerankerSettings(_BasePowermemSettings): def to_config(self) -> Dict[str, Any]: """ Convert settings to Rerank configuration dictionary. - + This method: 1. Gets the appropriate provider config class 2. Creates an instance (loading provider-specific fields from environment) 3. Overrides with explicitly set fields from this settings object 4. Returns the final configuration - + Provider-specific fields (e.g., api_base_url) are automatically loaded from environment variables by the provider config class. """ from powermem.integrations.rerank.config.base import BaseRerankConfig - + rerank_provider = self.provider.lower() - + # 1. Get provider config class from registry config_cls = ( BaseRerankConfig.get_provider_config_cls(rerank_provider) or BaseRerankConfig # fallback to base config ) - + # 2. Create provider settings from environment variables # Provider-specific fields are automatically loaded here provider_settings = config_cls() - + # 3. Collect fields to override overrides = {} for field in ("enabled", "model", "api_key", "api_base_url", "top_n"): @@ -387,11 +394,11 @@ def to_config(self) -> Dict[str, Any]: value = getattr(self, field) if value is not None: overrides[field] = value - + # 4. Update configuration with overrides if overrides: provider_settings = provider_settings.model_copy(update=overrides) - + # 5. Export using to_component_dict() to match RerankConfig structure return provider_settings.to_component_dict() @@ -534,36 +541,36 @@ def to_config( ) -> Optional[Dict[str, Any]]: """ Convert settings to GraphStore configuration dictionary. - + Provider-specific fields are automatically loaded from environment variables by the provider config class (with fallback to VectorStore env vars). """ if not self.enabled: return None - + from powermem.storage.config.base import BaseGraphStoreConfig - + graph_provider = self.provider.lower() - + # 1. Get provider config class from registry config_cls = ( BaseGraphStoreConfig.get_provider_config_cls(graph_provider) or BaseGraphStoreConfig ) - + # 2. Create provider settings from environment variables provider_settings = config_cls() - + # 3. Export to dict graph_config = provider_settings.model_dump(exclude_none=True) - + # 4. Build final config graph_store_config = { "enabled": True, "provider": graph_provider, "config": graph_config, } - + # 5. Add custom prompts if configured if self.custom_prompt: graph_store_config["custom_prompt"] = self.custom_prompt @@ -598,7 +605,9 @@ def to_config(self) -> Dict[str, Any]: if self.update_memory_prompt: result["custom_update_memory_prompt"] = self.update_memory_prompt if self.importance_evaluation_prompt: - result["custom_importance_evaluation_prompt"] = self.importance_evaluation_prompt + result["custom_importance_evaluation_prompt"] = ( + self.importance_evaluation_prompt + ) return result @@ -649,14 +658,14 @@ def to_config(self) -> Dict[str, Any]: # Sync embedding_model_dims from embedder to vector_store and graph_store embedder_config = config.get("embedder", {}) embedder_dims = embedder_config.get("config", {}).get("embedding_dims") - + if embedder_dims is not None: # Sync to vector_store if not set vector_store_config = config.get("vector_store", {}) vector_store_inner_config = vector_store_config.get("config", {}) if vector_store_inner_config.get("embedding_model_dims") is None: vector_store_inner_config["embedding_model_dims"] = embedder_dims - + # Sync to graph_store if not set if graph_store_config: graph_store_inner_config = graph_store_config.get("config", {}) @@ -674,24 +683,24 @@ def load_config_from_env() -> Dict[str, Any]: This function reads configuration from environment variables and builds a config dictionary. You can use this when you have .env file set up to avoid manually building config dict. - + It automatically detects the database provider (sqlite, oceanbase, postgres) and builds the appropriate configuration. - + Returns: Configuration dictionary built from environment variables - + Example: ```python from dotenv import load_dotenv from powermem.config_loader import load_config_from_env - + # Load .env file load_dotenv() - + # Get config config = load_config_from_env() - + # Use config from powermem import Memory memory = Memory(config=config) @@ -704,7 +713,7 @@ def load_config_from_env() -> Dict[str, Any]: class CreateConfigOptions(BaseModel): model_config = ConfigDict(extra="forbid") - database_provider: str = "sqlite" + database_provider: str = "oceanbase" llm_provider: str = "qwen" embedding_provider: str = "qwen" database_config: Dict[str, Any] = Field(default_factory=dict) @@ -724,7 +733,7 @@ class CreateConfigOptions(BaseModel): def create_config( - database_provider: str = "sqlite", + database_provider: str = "oceanbase", llm_provider: str = "qwen", embedding_provider: str = "qwen", database_config: Optional[Dict[str, Any]] = None, @@ -747,7 +756,7 @@ def create_config( need a minimal manual config. Args: - database_provider: Database provider ('sqlite', 'oceanbase', 'postgres') + database_provider: Database provider ('oceanbase', 'sqlite', 'postgres') llm_provider: LLM provider ('qwen', 'openai', etc.) embedding_provider: Embedding provider ('qwen', 'openai', etc.) database_config: Vector store configuration dictionary @@ -762,22 +771,22 @@ def create_config( embedding_model: Embedding model name embedding_dims: Embedding vector dimensions embedding_extra: Provider-specific embedding configuration fields - + Returns: Configuration dictionary - + Example: ```python from powermem.config_loader import create_config from powermem import Memory - + config = create_config( - database_provider='sqlite', + database_provider='oceanbase', llm_provider='qwen', llm_api_key='your_key', llm_model='qwen-plus' ) - + memory = Memory(config=config) ``` """ @@ -830,11 +839,13 @@ def create_config( }, }, } - + # Sync embedding_model_dims from embedder to vector_store if not set if config["vector_store"]["config"].get("embedding_model_dims") is None: - config["vector_store"]["config"]["embedding_model_dims"] = options.embedding_dims - + config["vector_store"]["config"][ + "embedding_model_dims" + ] = options.embedding_dims + return config @@ -846,38 +857,38 @@ def validate_config(config: Dict[str, Any]) -> bool: Args: config: Configuration dictionary to validate - + Returns: True if valid, False otherwise - + Example: ```python from powermem.config_loader import load_config_from_env, validate_config - + config = load_config_from_env() if validate_config(config): print("Configuration is valid!") ``` """ - required_sections = ['vector_store', 'llm', 'embedder'] - + required_sections = ["vector_store", "llm", "embedder"] + for section in required_sections: if section not in config: return False - - if 'provider' not in config[section]: + + if "provider" not in config[section]: return False - - if 'config' not in config[section]: + + if "config" not in config[section]: return False - + return True def auto_config() -> Dict[str, Any]: """ Automatically load configuration from environment variables. - + This is the simplest way to get configuration. It automatically loads .env file and returns the config. @@ -885,14 +896,14 @@ def auto_config() -> Dict[str, Any]: Returns: Configuration dictionary from environment variables - + Example: ```python from powermem import Memory - + # Simplest way - just load from .env memory = Memory(config=auto_config()) - + # Or even simpler with create_memory() from powermem import create_memory memory = create_memory() # Auto loads from .env diff --git a/src/powermem/configs.py b/src/powermem/configs.py index d7fd8d31..aabcc7a8 100644 --- a/src/powermem/configs.py +++ b/src/powermem/configs.py @@ -9,14 +9,19 @@ from pydantic import BaseModel, Field, field_validator from powermem.integrations.embeddings.config.base import BaseEmbedderConfig -from powermem.integrations.embeddings.config.providers import OpenAIEmbeddingConfig +from powermem.integrations.embeddings.config.providers import ( + PyseekdbDefaultEmbeddingConfig, +) from powermem.integrations.embeddings.config.sparse_base import BaseSparseEmbedderConfig import powermem.integrations.embeddings.config.sparse_providers # noqa: F401 — ensures sparse provider registry is populated from powermem.integrations.llm.config.base import BaseLLMConfig from powermem.integrations.llm.config.qwen import QwenConfig from powermem.storage.config.base import BaseVectorStoreConfig, BaseGraphStoreConfig -from powermem.storage.config.sqlite import SQLiteConfig -from powermem.storage.config.oceanbase import OceanBaseGraphConfig +from powermem.storage.config.sqlite import SQLiteConfig # noqa: F401 — keeps SQLite provider registered +from powermem.storage.config.oceanbase import ( + OceanBaseConfig, + OceanBaseGraphConfig, # noqa: F401 — keeps OceanBase graph provider registered +) from powermem.integrations.rerank.config.base import BaseRerankConfig @@ -207,16 +212,26 @@ class MemoryConfig(BaseModel): """Main memory configuration class.""" vector_store: BaseVectorStoreConfig = Field( - description="Configuration for the vector store", - default_factory=SQLiteConfig, + description=( + "Configuration for the vector store. Defaults to the OceanBase " + "provider with an empty host, which boots embedded seekdb on " + "disk (no separate server) so PowerMem starts with zero ops; " + "set OCEANBASE_HOST to point at a remote OceanBase cluster, or " + "switch the provider to sqlite / postgres." + ), + default_factory=OceanBaseConfig, ) llm: BaseLLMConfig = Field( description="Configuration for the language model", default_factory=QwenConfig, ) embedder: BaseEmbedderConfig = Field( - description="Configuration for the embedding model", - default_factory=OpenAIEmbeddingConfig, + description=( + "Configuration for the embedding model. Defaults to the built-in local " + "all-MiniLM-L6-v2 model (384 dims) so PowerMem can start with zero " + "configuration; override to use OpenAI/Qwen/SiliconFlow/etc." + ), + default_factory=PyseekdbDefaultEmbeddingConfig, ) graph_store: Optional[BaseGraphStoreConfig] = Field( description="Configuration for the graph store (None means disabled)", diff --git a/src/powermem/core/memory.py b/src/powermem/core/memory.py index 4aac6fce..3afb041c 100644 --- a/src/powermem/core/memory.py +++ b/src/powermem/core/memory.py @@ -1263,7 +1263,7 @@ def search( # Intelligent plugin lifecycle management on search if self._intelligence_plugin and self._intelligence_plugin.enabled: updates, deletes = self._intelligence_plugin.on_search(processed_results) - # For embedded SeekDB the engine is single-threaded (NullPool, not + # For embedded seekdb the engine is single-threaded (NullPool, not # thread-safe). Background threads opening concurrent connections # crash the C++ layer. Run updates/deletes synchronously instead. _is_embedded_store = ( diff --git a/src/powermem/integrations/embeddings/config/__init__.py b/src/powermem/integrations/embeddings/config/__init__.py index 24d2625b..3c1c7b19 100644 --- a/src/powermem/integrations/embeddings/config/__init__.py +++ b/src/powermem/integrations/embeddings/config/__init__.py @@ -10,6 +10,7 @@ MockEmbeddingConfig, OllamaEmbeddingConfig, OpenAIEmbeddingConfig, + PyseekdbDefaultEmbeddingConfig, QwenEmbeddingConfig, SiliconFlowEmbeddingConfig, TogetherEmbeddingConfig, @@ -32,6 +33,7 @@ "MockEmbeddingConfig", "OllamaEmbeddingConfig", "OpenAIEmbeddingConfig", + "PyseekdbDefaultEmbeddingConfig", "QwenSparseEmbeddingConfig", "QwenEmbeddingConfig", "SiliconFlowEmbeddingConfig", diff --git a/src/powermem/integrations/embeddings/config/providers.py b/src/powermem/integrations/embeddings/config/providers.py index a0b84c0c..9830c76a 100644 --- a/src/powermem/integrations/embeddings/config/providers.py +++ b/src/powermem/integrations/embeddings/config/providers.py @@ -253,5 +253,24 @@ class MockEmbeddingConfig(BaseEmbedderConfig): model_config = settings_config("EMBEDDING_", extra="allow", env_file=None) +class PyseekdbDefaultEmbeddingConfig(BaseEmbedderConfig): + """Built-in default embedder (all-MiniLM-L6-v2, 384 dims). + + Requires no API key; runs locally via pyseekdb's ONNX-backed + ``DefaultEmbeddingFunction``. Selected automatically when no embedder is + configured, so PowerMem can start with zero configuration. + """ + + _provider_name = "default" + _class_path = ( + "powermem.integrations.embeddings.pyseekdb_default.PyseekdbDefaultEmbedding" + ) + + model_config = settings_config("EMBEDDING_", extra="allow", env_file=None) + + model: Optional[str] = Field(default="all-MiniLM-L6-v2") + embedding_dims: Optional[int] = Field(default=384) + + class CustomEmbeddingConfig(BaseEmbedderConfig): model_config = settings_config("EMBEDDING_", extra="allow", env_file=None) diff --git a/src/powermem/integrations/embeddings/pyseekdb_default.py b/src/powermem/integrations/embeddings/pyseekdb_default.py new file mode 100644 index 00000000..fc6d492c --- /dev/null +++ b/src/powermem/integrations/embeddings/pyseekdb_default.py @@ -0,0 +1,216 @@ +"""Built-in default embedder. + +Wraps ``pyseekdb.client.embedding_function.DefaultEmbeddingFunction`` so PowerMem +can start with zero configuration and no external API key. The model is +``sentence-transformers/all-MiniLM-L6-v2`` (384-dim), the same default used by +pyseekdb. It downloads to a local cache on first use and runs locally afterwards. + +When the network is slow or blocked (e.g. behind a firewall or GFW), +``sentence_transformers`` contacts ``huggingface.co`` to check for updates even +when the model is already cached, causing a 30-60s hang on every server start. +We detect the cache state first — if cached, we force ``HF_HUB_OFFLINE=1`` so +all downstream calls (including pyseekdb's internal ``SentenceTransformer``) +use the cache immediately. If not cached, we attempt a download with a timeout +and provide a friendly error with manual instructions on failure. + +Override via the ``embedder`` section of :class:`~powermem.configs.MemoryConfig` +to switch to a production-grade provider (OpenAI, Qwen, SiliconFlow, etc.). +""" + +from __future__ import annotations + +import logging +import threading +from typing import List, Literal, Optional + +from powermem.integrations.embeddings.base import EmbeddingBase +from powermem.integrations.embeddings.config.base import BaseEmbedderConfig + +logger = logging.getLogger(__name__) + +logging.getLogger("onnxruntime").setLevel(logging.WARNING) +logging.getLogger("huggingface_hub").setLevel(logging.WARNING) + + +# Match pyseekdb's DefaultEmbeddingFunction so the two systems agree on the +# default model and dimension. Keeping this constant local avoids importing +# pyseekdb at module import time. +DEFAULT_MODEL_NAME = "all-MiniLM-L6-v2" +DEFAULT_MODEL_REPO_ID = "sentence-transformers/all-MiniLM-L6-v2" +DEFAULT_EMBEDDING_DIMS = 384 + +# Timeout (seconds) for downloading the embedding model. When the network +# is blocked the huggingface_hub retry loop can take 2-3 minutes; this +# caps it so the server fails fast with a helpful error. +_MODEL_DOWNLOAD_TIMEOUT_S = 30 + + +def _is_model_cached(model_name: str) -> bool: + """Check if a HuggingFace model is already in the local cache.""" + try: + from huggingface_hub import try_to_load_from_cache + + result = try_to_load_from_cache(model_name, "config.json") + return result is not None # None = not cached + except Exception: + return False + + +def _load_sentence_transformer_with_fallback(model_name: str, repo_id: str): + """Load a SentenceTransformer with cache-first logic and download timeout. + + 1. **Cache hit** → load from cache and monkey-patch downstream so pyseekdb's + internal ``SentenceTransformer`` call re-uses the already-loaded instance + instead of triggering its own (network-stuck) load. + 2. **Cache miss** → attempt a real download in a background thread with a + timeout. If the download succeeds, use it. If it times out or fails, + raise a clear error with manual download instructions. + + ``sentence_transformers`` is an optional dependency (the ``extras`` group). + When it is not installed we skip this pre-warm step entirely and let + pyseekdb's ``DefaultEmbeddingFunction`` load the model itself via + ``onnxruntime`` — the embedder still works, it just misses the cache-first + optimization that avoids a possible huggingface.co hang on slow networks. + + Args: + model_name: short name passed to SentenceTransformer (e.g. "all-MiniLM-L6-v2") + repo_id: full HuggingFace repo ID (e.g. "sentence-transformers/all-MiniLM-L6-v2") + """ + try: + from sentence_transformers import SentenceTransformer + except ImportError: + logger.debug( + "sentence_transformers not installed; skipping model pre-warm " + "(install the 'extras' group to enable the cache-first optimization)" + ) + return None + + if _is_model_cached(repo_id): + model = SentenceTransformer(model_name, local_files_only=True) + _patch_sentence_transformer_cache(model_name, model) + logger.info("Loaded %s from local cache", model_name) + return model + + # Cache miss: attempt download with timeout. + logger.debug( + "Model %s not in cache, attempting download (timeout %ss)…", + model_name, + _MODEL_DOWNLOAD_TIMEOUT_S, + ) + + result: list = [None] + error: list = [None] + + def _load(): + try: + result[0] = SentenceTransformer(model_name) + except Exception as exc: + error[0] = exc + + thread = threading.Thread(target=_load, daemon=True) + thread.start() + thread.join(timeout=_MODEL_DOWNLOAD_TIMEOUT_S) + + if result[0] is not None: + _patch_sentence_transformer_cache(model_name, result[0]) + logger.info("Downloaded and loaded %s", model_name) + return result[0] + + if error[0] is not None: + raise RuntimeError( + f"Failed to download embedding model '{model_name}': " + f"{error[0]}. " + f"Download it manually: " + f'python -c "from modelscope import snapshot_download; ' + f"snapshot_download('AI-ModelScope/all-MiniLM-L6-v2')\"" + ) from error[0] + + raise RuntimeError( + f"Downloading embedding model '{model_name}' timed out after " + f"{_MODEL_DOWNLOAD_TIMEOUT_S}s. The model is not cached and the " + f"network is unreachable. " + f"Download it manually: " + f'python -c "from modelscope import snapshot_download; ' + f"snapshot_download('AI-ModelScope/all-MiniLM-L6-v2')\"" + ) + + +def _patch_sentence_transformer_cache(model_name: str, model): + """Inject a pre-loaded model into pyseekdb's internal cache. + + pyseekdb's ``SentenceTransformerEmbeddingFunction`` keeps a class-level + ``models`` dict. If the model is already present, it skips the expensive + ``SentenceTransformer(model_name)`` call — which would otherwise try to + reach ``huggingface.co`` even with a cached model. + """ + try: + from pyseekdb.utils.embedding_functions.sentence_transformer_embedding_function import ( + SentenceTransformerEmbeddingFunction, + ) + + SentenceTransformerEmbeddingFunction.models[model_name] = model + logger.debug("Patched pyseekdb SentenceTransformer cache for %s", model_name) + except ImportError: + logger.debug("Could not patch pyseekdb cache (module not available)") + + +class PyseekdbDefaultEmbedding(EmbeddingBase): + """Zero-config local embedder backed by pyseekdb's DefaultEmbeddingFunction.""" + + def __init__(self, config: Optional[BaseEmbedderConfig] = None): + super().__init__(config) + + try: + from pyseekdb.client.embedding_function import DefaultEmbeddingFunction + except ImportError as exc: # pragma: no cover - pyseekdb is a hard dep + raise ImportError( + "pyseekdb is required for the built-in default embedder. " + "Install it with `pip install pyseekdb`." + ) from exc + + # Pre-load the model with cache-first fallback. This ensures the + # model is available BEFORE pyseekdb's DefaultEmbeddingFunction() + # creates its own SentenceTransformer (which would otherwise hang + # on a blocked network). HF_HUB_OFFLINE=1 is set at module level + # so pyseekdb's internal call also hits the cache. + _load_sentence_transformer_with_fallback( + DEFAULT_MODEL_NAME, + DEFAULT_MODEL_REPO_ID, + ) + + self._fn = DefaultEmbeddingFunction() + self.config.model = self.config.model or DEFAULT_MODEL_NAME + self.config.embedding_dims = ( + self.config.embedding_dims or DEFAULT_EMBEDDING_DIMS + ) + + logger.info( + "PyseekdbDefaultEmbedding ready (model=%s, dims=%s)", + self.config.model, + self.config.embedding_dims, + ) + + def embed( + self, + text, + memory_action: Optional[Literal["add", "search", "update"]] = None, + ): + """Return a single embedding vector for ``text``.""" + del memory_action # unused: default embedder treats all actions identically + if text is None: + raise ValueError("text must not be None") + embeddings = self._fn([text] if isinstance(text, str) else list(text)) + if not embeddings: + raise RuntimeError("default embedder returned no vectors") + return list(embeddings[0]) + + def embed_batch( + self, + texts: List[str], + memory_action: Optional[Literal["add", "search", "update"]] = None, + ) -> List[List[float]]: + """Batch embedding using the underlying ONNX model directly.""" + del memory_action # unused: default embedder treats all actions identically + if not texts: + return [] + return [list(vec) for vec in self._fn(list(texts))] diff --git a/src/powermem/storage/config/base.py b/src/powermem/storage/config/base.py index f6117afa..55c31b93 100644 --- a/src/powermem/storage/config/base.py +++ b/src/powermem/storage/config/base.py @@ -108,7 +108,7 @@ class BaseGraphStoreConfig(BaseVectorStoreConfig): "GRAPH_STORE_HOST", # Priority 1 "OCEANBASE_HOST", # Priority 2 (fallback) ), - description="Database server host (empty means embedded SeekDB mode)" + description="Database server host (empty means embedded seekdb mode)" ) ob_path: str = Field( @@ -118,7 +118,7 @@ class BaseGraphStoreConfig(BaseVectorStoreConfig): "GRAPH_STORE_PATH", "OCEANBASE_PATH", ), - description="Path for embedded SeekDB data directory (used when host is empty)" + description="Path for embedded seekdb data directory (used when host is empty)" ) port: str = Field( diff --git a/src/powermem/storage/config/oceanbase.py b/src/powermem/storage/config/oceanbase.py index 28dfa8df..690db4d7 100644 --- a/src/powermem/storage/config/oceanbase.py +++ b/src/powermem/storage/config/oceanbase.py @@ -35,7 +35,7 @@ class OceanBaseConfig(BaseVectorStoreConfig): "host", "OCEANBASE_HOST", ), - description="OceanBase server host (empty means embedded SeekDB mode)" + description="OceanBase server host (empty means embedded seekdb mode)" ) ob_path: str = Field( @@ -44,7 +44,7 @@ class OceanBaseConfig(BaseVectorStoreConfig): "ob_path", "OCEANBASE_PATH", ), - description="Path for embedded SeekDB data directory (used when host is empty)" + description="Path for embedded seekdb data directory (used when host is empty)" ) port: str = Field( diff --git a/src/powermem/storage/oceanbase/oceanbase.py b/src/powermem/storage/oceanbase/oceanbase.py index fbeed448..92b80fd4 100644 --- a/src/powermem/storage/oceanbase/oceanbase.py +++ b/src/powermem/storage/oceanbase/oceanbase.py @@ -418,14 +418,14 @@ def _create_col(self): "Please configure embedding_model_dims in your OceanBaseConfig." ) - # Embedded SeekDB does not tolerate IVF-family indexes on small datasets: + # Embedded seekdb does not tolerate IVF-family indexes on small datasets: # IVF requires at least nlist training vectors; fewer vectors causes a native # SIGSEGV that cannot be caught by Python. Switch to HNSW automatically. is_embedded = not self.connection_args.get("host") if is_embedded and self.index_type in constants.INDEX_TYPE_IVF: nlist = (self.vidx_algo_params or {}).get("nlist", constants.DEFAULT_OCEANBASE_IVF_BUILD_PARAM.get("nlist", 128)) logger.warning( - "Embedded SeekDB: index_type '%s' (nlist=%d) requires at least %d vectors " + "Embedded seekdb: index_type '%s' (nlist=%d) requires at least %d vectors " "and may crash on small datasets. Auto-switching to HNSW.", self.index_type, nlist, nlist, ) @@ -649,10 +649,10 @@ def _row_to_model(self, row): record = self.model_class() # Support both SQLAlchemy Row objects and plain dicts (used when rows - # are materialised early to avoid embedded SeekDB cursor crashes). + # are materialised early to avoid embedded seekdb cursor crashes). mapping = row._mapping if hasattr(row, '_mapping') else row - # Build a normalized lookup: strip table-name prefix that embedded SeekDB + # Build a normalized lookup: strip table-name prefix that embedded seekdb # may add (e.g. "memories.document" → "document") so we can always find # the value regardless of whether the driver returns bare or prefixed keys. normalized: Dict[str, any] = {} @@ -1028,7 +1028,7 @@ def _fulltext_search(self, query: str, limit: int = 5, filters: Optional[Dict] = # Execute the query with parameters - use direct parameter passing # Materialize rows to dicts inside the connection context to avoid - # "pure virtual method called" crash in embedded SeekDB (the C++ + # "pure virtual method called" crash in embedded seekdb (the C++ # cursor is invalidated once the transaction/connection closes). with self.obvector.engine.connect() as conn: with conn.begin(): @@ -1148,7 +1148,7 @@ def _sparse_search(self, sparse_embedding: Dict[int, float], limit: int = 5, fil # Execute the query # Materialize rows to dicts inside the connection context to avoid - # "pure virtual method called" crash in embedded SeekDB. + # "pure virtual method called" crash in embedded seekdb. with self.obvector.engine.connect() as conn: with conn.begin(): logger.debug(f"Executing sparse vector search query with sparse_vector: {sparse_vector_str}") @@ -1388,7 +1388,7 @@ def _hybrid_search(self, query: str, vectors: List[List[float]], limit: int = 5, is_embedded = not self.connection_args.get("host") if is_embedded: - # SeekDB embedded engine does not support concurrent SQL across threads + # seekdb embedded engine does not support concurrent SQL across threads try: vector_results = self._vector_search(query, vectors, candidate_limit, filters) except Exception as e: @@ -1911,7 +1911,7 @@ def _get_records_by_id(self, vector_id, output_columns: List[str]) -> list: """Fetch rows by primary key while keeping the connection open during fetchall. pyobvector.get() returns the cursor *after* committing the transaction via - ``with conn.begin()``. In embedded SeekDB the commit invalidates the cursor, + ``with conn.begin()``. In embedded seekdb the commit invalidates the cursor, so calling fetchall() on it afterwards triggers a C++ ``pure virtual method called`` crash. This helper avoids that by running fetchall() inside the ``with engine.connect()`` block. diff --git a/src/powermem/user_memory/storage/user_profile.py b/src/powermem/user_memory/storage/user_profile.py index f3501a1d..a2536477 100644 --- a/src/powermem/user_memory/storage/user_profile.py +++ b/src/powermem/user_memory/storage/user_profile.py @@ -51,12 +51,12 @@ def __init__( Args: table_name (str): Name of the table to store user profiles. connection_args (Optional[Dict[str, Any]]): Connection parameters for OceanBase. - host (Optional[str]): OceanBase server host (empty means embedded SeekDB mode). + host (Optional[str]): OceanBase server host (empty means embedded seekdb mode). port (Optional[str]): OceanBase server port. user (Optional[str]): OceanBase username. password (Optional[str]): OceanBase password. db_name (Optional[str]): OceanBase database name. - ob_path (Optional[str]): Path for embedded SeekDB data directory. + ob_path (Optional[str]): Path for embedded seekdb data directory. """ self.table_name = table_name self.primary_field = "id" diff --git a/src/powermem/utils/oceanbase_util.py b/src/powermem/utils/oceanbase_util.py index df4a2e07..3877b6b9 100644 --- a/src/powermem/utils/oceanbase_util.py +++ b/src/powermem/utils/oceanbase_util.py @@ -787,14 +787,14 @@ def parse_native_hybrid_results( @staticmethod def safe_fetchall(result): - """Safely fetch all rows, returning empty list when SeekDB embedded returns no-row result for empty tables.""" + """Safely fetch all rows, returning empty list when seekdb embedded returns no-row result for empty tables.""" if not getattr(result, 'returns_rows', True): return [] return result.fetchall() @staticmethod def safe_fetchone(result): - """Safely fetch one row, returning None when SeekDB embedded returns no-row result for empty tables.""" + """Safely fetch one row, returning None when seekdb embedded returns no-row result for empty tables.""" if not getattr(result, 'returns_rows', True): return None return result.fetchone() @@ -802,13 +802,13 @@ def safe_fetchone(result): @staticmethod def ensure_embedded_database_exists(ob_path: str, db_name: str) -> None: """ - For embedded SeekDB mode only: ensure the target database exists, creating it if necessary. + For embedded seekdb mode only: ensure the target database exists, creating it if necessary. Connects to the default 'test' database first, then executes CREATE DATABASE IF NOT EXISTS for the target database. Args: - ob_path: Path for embedded SeekDB data directory. + ob_path: Path for embedded seekdb data directory. db_name: Target database name to ensure exists. """ if not db_name or db_name == "test": diff --git a/src/powermem/utils/utils.py b/src/powermem/utils/utils.py index 28ffa024..dd6ba5ec 100644 --- a/src/powermem/utils/utils.py +++ b/src/powermem/utils/utils.py @@ -4,26 +4,28 @@ This module provides utility functions and helper classes. """ -import os import hashlib import json import logging +import os import re -import time import threading +import time from datetime import datetime, timezone -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) # Try to import zoneinfo (Python 3.9+) try: from zoneinfo import ZoneInfo + _HAS_ZONEINFO = True except ImportError: _HAS_ZONEINFO = False try: import pytz + _HAS_PYTZ = True except ImportError: _HAS_PYTZ = False @@ -34,18 +36,84 @@ _timezone_lock = threading.Lock() +def _is_valid_timezone(name: str) -> bool: + """Return True if ``name`` is a recognized IANA timezone.""" + if not name: + return False + try: + if _HAS_ZONEINFO: + ZoneInfo(name) + return True + if globals().get("_HAS_PYTZ"): + pytz.timezone(name) + return True + except Exception: + return False + return False + + +def detect_system_timezone() -> str: + """Best-effort IANA timezone name of the host machine. + + Resolution order: ``tzlocal`` (if installed) -> ``/etc/timezone`` -> + ``/etc/localtime`` symlink -> ``TZ`` env var. Returns ``"UTC"`` when the + host zone cannot be determined or is not a recognized IANA name. This is + only used as a fallback: an explicit ``TIMEZONE`` always takes precedence. + """ + candidates: List[str] = [] + + # 1. tzlocal — most robust, cross-platform (optional dependency) + try: + from tzlocal import get_localzone_name + + name = get_localzone_name() + if name: + candidates.append(name) + except Exception: + pass + + # 2. /etc/timezone — Debian/Ubuntu and most Linux distros + try: + with open("/etc/timezone", "r", encoding="utf-8") as fh: + candidates.append(fh.read().strip()) + except OSError: + pass + + # 3. /etc/localtime symlink -> .../zoneinfo// + try: + link = os.readlink("/etc/localtime") + marker = "zoneinfo/" + if marker in link: + candidates.append(link.split(marker, 1)[1]) + except OSError: + pass + + # 4. TZ environment variable (only accepted if a real zone name) + tz_env = os.environ.get("TZ") + if tz_env: + candidates.append(tz_env.strip()) + + for candidate in candidates: + if _is_valid_timezone(candidate): + logger.debug(f"Detected system timezone: {candidate}") + return candidate + + logger.debug("Could not detect system timezone; falling back to UTC") + return "UTC" + + def set_timezone(timezone_str: Any) -> None: """ Set the timezone from configuration. - + This function should be called during Memory initialization if timezone is specified in the config. It takes precedence over environment variables. - + Args: timezone_str: Timezone string (e.g., 'Asia/Shanghai', 'UTC') """ global _timezone_cache, _timezone_str - + with _timezone_lock: tz = timezone_str if isinstance(tz, dict): @@ -63,11 +131,11 @@ def set_timezone(timezone_str: Any) -> None: def get_timezone() -> Any: """ Get the configured timezone from config or environment variable. - + This function first checks if timezone was set via set_timezone() (from config), then falls back to TIMEZONE environment variable. The timezone is cached after first access for performance. - + Configuration: Timezone can be configured in two ways: 1. Via config dict/JSON: Set 'timezone' in your config, which will be @@ -77,37 +145,38 @@ def get_timezone() -> Any: - TIMEZONE=America/New_York (for US Eastern Time) - TIMEZONE=Europe/London (for UK Time) - TIMEZONE=UTC (default, if not specified) - + Common timezone names: - Asia/Shanghai, Asia/Tokyo, Asia/Hong_Kong - America/New_York, America/Los_Angeles, America/Chicago - Europe/London, Europe/Paris, Europe/Berlin - UTC (Coordinated Universal Time) - + Returns: - Timezone object (ZoneInfo or pytz timezone) for the configured timezone, + Timezone object (ZoneInfo or pytz timezone) for the configured timezone, or UTC if not configured or invalid timezone specified - + Note: The timezone is cached globally. To reset the cache (e.g., after changing the timezone), call reset_timezone_cache(). """ - global _timezone_cache, _timezone_str - + global _timezone_cache + if _timezone_cache is not None: return _timezone_cache - + with _timezone_lock: if _timezone_cache is not None: return _timezone_cache - - # Priority: config > environment variable > default + + # Priority: config > TIMEZONE env var > detected host zone > UTC if _timezone_str is not None: timezone_str = _timezone_str else: - # Fallback to environment variable (for backward compatibility) - timezone_str = os.getenv('TIMEZONE', 'UTC') - + # No config: fall back to the TIMEZONE env var, then auto-detect + # the host timezone (detect_system_timezone returns UTC on failure). + timezone_str = os.getenv("TIMEZONE") or detect_system_timezone() + try: if _HAS_ZONEINFO: _timezone_cache = ZoneInfo(timezone_str) @@ -117,44 +186,46 @@ def get_timezone() -> Any: logger.warning("No timezone library available, using UTC") _timezone_cache = timezone.utc except Exception as e: - logger.warning(f"Invalid timezone '{timezone_str}', falling back to UTC: {e}") + logger.warning( + f"Invalid timezone '{timezone_str}', falling back to UTC: {e}" + ) try: if _HAS_ZONEINFO: - _timezone_cache = ZoneInfo('UTC') + _timezone_cache = ZoneInfo("UTC") elif _HAS_PYTZ: _timezone_cache = pytz.UTC else: _timezone_cache = timezone.utc except Exception: _timezone_cache = timezone.utc - + return _timezone_cache def get_current_datetime() -> datetime: """ Get current datetime in the configured timezone. - + This function is used throughout powermem to get the current time in the configured timezone. It replaces datetime.utcnow() to support timezone configuration via the TIMEZONE environment variable. - + This function respects the TIMEZONE environment variable set in .env file. If TIMEZONE is not set, it defaults to UTC. - + Returns: datetime object in the configured timezone (timezone-aware) - + Example: # In .env file: # TIMEZONE=Asia/Shanghai - + from powermem.utils.utils import get_current_datetime now = get_current_datetime() # Returns datetime in Asia/Shanghai timezone - + # The returned datetime is timezone-aware: # datetime.datetime(2025, 1, 15, 14, 30, 0, tzinfo=zoneinfo.ZoneInfo(key='Asia/Shanghai')) - + Note: All timestamps in powermem (created_at, updated_at, etc.) are generated using this function to ensure consistency with the configured timezone. @@ -177,11 +248,11 @@ def reset_timezone_cache(): def generate_memory_id(content: str, user_id: Optional[str] = None) -> str: """ Generate a unique memory ID based on content and user. - + Args: content: Memory content user_id: User ID - + Returns: Unique memory ID """ @@ -192,159 +263,186 @@ def generate_memory_id(content: str, user_id: Optional[str] = None) -> str: def validate_memory_data(data: Dict[str, Any]) -> bool: """ Validate memory data structure. - + Args: data: Memory data to validate - + Returns: True if valid, False otherwise """ required_fields = ["content"] - + for field in required_fields: if field not in data: logger.error(f"Missing required field: {field}") return False - + if not isinstance(data["content"], str) or not data["content"].strip(): logger.error("Content must be a non-empty string") return False - + return True def sanitize_content(content: str) -> str: """ Sanitize memory content. - + Args: content: Content to sanitize - + Returns: Sanitized content """ # Remove excessive whitespace content = " ".join(content.split()) - + # Remove control characters content = "".join(char for char in content if ord(char) >= 32 or char in "\n\t") - + return content.strip() def format_memory_for_display(memory: Dict[str, Any]) -> str: """ Format memory for display. - + Args: memory: Memory data - + Returns: Formatted memory string """ content = memory.get("content", "") created_at = memory.get("created_at", "") metadata = memory.get("metadata", {}) - + formatted = f"Content: {content}\n" if created_at: formatted += f"Created: {created_at}\n" if metadata: formatted += f"Metadata: {json.dumps(metadata, indent=2, ensure_ascii=False)}\n" - + return formatted def merge_memories(memories: List[Dict[str, Any]]) -> str: """ Merge multiple memories into a single string. - + Args: memories: List of memory data - + Returns: Merged memory content """ if not memories: return "" - + merged_content = [] for memory in memories: content = memory.get("content", "") if content: merged_content.append(content) - + return "\n\n".join(merged_content) def calculate_similarity(text1: str, text2: str) -> float: """ Calculate similarity between two texts. - + Args: text1: First text text2: Second text - + Returns: Similarity score between 0 and 1 """ # Simple word-based similarity words1 = set(text1.lower().split()) words2 = set(text2.lower().split()) - + if not words1 and not words2: return 1.0 - + if not words1 or not words2: return 0.0 - + intersection = words1.intersection(words2) union = words1.union(words2) - + return len(intersection) / len(union) def extract_keywords(text: str, max_keywords: int = 10) -> List[str]: """ Extract keywords from text. - + Args: text: Text to extract keywords from max_keywords: Maximum number of keywords - + Returns: List of keywords """ # Simple keyword extraction words = text.lower().split() - + # Remove common stop words stop_words = { - "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", - "of", "with", "by", "is", "are", "was", "were", "be", "been", "have", - "has", "had", "do", "does", "did", "will", "would", "could", "should" + "the", + "a", + "an", + "and", + "or", + "but", + "in", + "on", + "at", + "to", + "for", + "of", + "with", + "by", + "is", + "are", + "was", + "were", + "be", + "been", + "have", + "has", + "had", + "do", + "does", + "did", + "will", + "would", + "could", + "should", } - + keywords = [word for word in words if word not in stop_words and len(word) > 2] - + # Count frequency word_count = {} for word in keywords: word_count[word] = word_count.get(word, 0) + 1 - + # Sort by frequency sorted_keywords = sorted(word_count.items(), key=lambda x: x[1], reverse=True) - + return [word for word, count in sorted_keywords[:max_keywords]] def format_timestamp(timestamp: datetime) -> str: """ Format timestamp for display. - + Args: timestamp: Timestamp to format - + Returns: Formatted timestamp string with timezone information """ @@ -352,7 +450,7 @@ def format_timestamp(timestamp: datetime) -> str: if timestamp.tzinfo is None: tz = get_timezone() timestamp = timestamp.replace(tzinfo=tz) - + # Format with timezone name timezone_name = timestamp.tzinfo.tzname(timestamp) if timestamp.tzinfo else "UTC" return timestamp.strftime(f"%Y-%m-%d %H:%M:%S {timezone_name}") @@ -361,10 +459,10 @@ def format_timestamp(timestamp: datetime) -> str: def parse_timestamp(timestamp_str: str) -> Optional[datetime]: """ Parse timestamp string. - + Args: timestamp_str: Timestamp string to parse - + Returns: Parsed datetime object or None if invalid """ @@ -412,19 +510,19 @@ def extract_json(text): def parse_json_from_text(text: str, expected_type: type = dict) -> Optional[Any]: """ Parse JSON from text, with fallback to extract JSON if wrapped in text. - + This function first tries to parse the text directly as JSON. If that fails, it attempts to extract JSON objects from the text using regex pattern matching. - + Args: text: Text that may contain JSON expected_type: Expected type of the parsed JSON (default: dict). If the parsed JSON is not of this type, returns None. - + Returns: Parsed JSON object of the expected type, or None if parsing fails or the parsed object is not of the expected type. - + Examples: >>> parse_json_from_text('{"key": "value"}') {'key': 'value'} @@ -438,14 +536,20 @@ def parse_json_from_text(text: str, expected_type: type = dict) -> Optional[Any] parsed = json.loads(text) if isinstance(parsed, expected_type): return parsed - logger.warning(f"Parsed JSON is not of expected type {expected_type}, got: {type(parsed)}") + logger.warning( + f"Parsed JSON is not of expected type {expected_type}, got: {type(parsed)}" + ) return None except json.JSONDecodeError: pass - + # Try to extract JSON from text if it's wrapped # Match JSON objects: { ... } or arrays: [ ... ] - pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}' if expected_type == dict else r'\[[^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*\]' + pattern = ( + r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}" + if expected_type == dict + else r"\[[^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*\]" + ) json_match = re.search(pattern, text, re.DOTALL) if json_match: try: @@ -455,27 +559,29 @@ def parse_json_from_text(text: str, expected_type: type = dict) -> Optional[Any] return parsed except json.JSONDecodeError: pass - - logger.error(f"Failed to parse JSON from text, expected type: {expected_type}, text: {text}") + + logger.error( + f"Failed to parse JSON from text, expected type: {expected_type}, text: {text}" + ) return None def parse_conversation_text(messages: Any) -> str: """ Parse conversation messages into text format. - + This function handles multiple input formats: - String: Returns as-is - Dict: Extracts 'content' field - List of dicts: Formats as "role: content\n" for each message (skips system messages) - Other types: Converts to string - + Args: messages: Conversation messages (str, dict, or list[dict]) - + Returns: Conversation text string - + Examples: >>> parse_conversation_text("Hello") 'Hello' @@ -491,9 +597,9 @@ def parse_conversation_text(messages: Any) -> str: elif isinstance(messages, list): conversation_text = "" for msg in messages: - if isinstance(msg, dict) and 'role' in msg and 'content' in msg: - role = msg['role'] - content = msg.get('content', '') + if isinstance(msg, dict) and "role" in msg and "content" in msg: + role = msg["role"] + content = msg.get("content", "") if role != "system": # Skip system messages conversation_text += f"{role}: {content}\n" return conversation_text @@ -507,11 +613,14 @@ def format_entities(entities): formatted_lines = [] for entity in entities: - simplified = f"{entity['source']} -- {entity['relationship']} -- {entity['destination']}" + simplified = ( + f"{entity['source']} -- {entity['relationship']} -- {entity['destination']}" + ) formatted_lines.append(simplified) return "\n".join(formatted_lines) + def remove_code_blocks(content: str) -> str: """ Removes enclosing code block markers ```[language] and ``` from a given string. @@ -526,7 +635,9 @@ def remove_code_blocks(content: str) -> str: return match.group(1).strip() if match else content.strip() -def llm_json_text_with_fallback(llm: Any, *, messages: List[Dict[str, str]], **kwargs: Any) -> str: +def llm_json_text_with_fallback( + llm: Any, *, messages: List[Dict[str, str]], **kwargs: Any +) -> str: """ Call llm.generate_response with OpenAI-style json_object mode; if the body is empty, retry without response_format. Some OpenAI-compatible APIs return blank message.content when @@ -661,7 +772,10 @@ def get_image_description(image_obj: Any, llm: Any, vision_details: Any) -> str: "type": "text", "text": "A user is providing an image. Provide a high level description of the image and do not include any additional text.", }, - {"type": "image_url", "image_url": {"url": image_obj, "detail": detail}}, + { + "type": "image_url", + "image_url": {"url": image_obj, "detail": detail}, + }, ], }, ] @@ -671,29 +785,31 @@ def get_image_description(image_obj: Any, llm: Any, vision_details: Any) -> str: return llm.generate_response(messages=messages) -def _process_content_item(item: Dict[str, Any], role: str, llm: Any, vision_details: Any, audio_llm: Any) -> Optional[str]: +def _process_content_item( + item: Dict[str, Any], role: str, llm: Any, vision_details: Any, audio_llm: Any +) -> Optional[str]: """ Process a single content item and return processed text content. - + Args: item: Content item dict role: Message role llm: LLM instance for image description vision_details: Vision details setting audio_llm: Audio LLM instance for transcription - + Returns: Processed text content or None if item should be skipped """ if not isinstance(item, dict): return None - + item_type = item.get("type") - + if item_type == "text": text_content = item.get("text", "") return text_content if text_content else None - + elif item_type == "image_url": image_url = item.get("image_url", {}).get("url") if image_url: @@ -703,11 +819,13 @@ def _process_content_item(item: Dict[str, Any], role: str, llm: Any, vision_deta except Exception as e: raise Exception(f"Error while processing image {image_url}: {e}") return None - + elif item_type == "audio": if audio_llm is not None: audio_content = item.get("content", {}) - audio_url = audio_content.get("audio") if isinstance(audio_content, dict) else None + audio_url = ( + audio_content.get("audio") if isinstance(audio_content, dict) else None + ) if audio_url: try: transcribed_text = audio_llm.transcribe(audio_url=audio_url) @@ -718,13 +836,18 @@ def _process_content_item(item: Dict[str, Any], role: str, llm: Any, vision_deta else: logger.warning(f"Audio item found but audio_llm is not configured: {item}") return None - + else: logger.warning(f"Unknown content type: {item_type}") return None -def parse_vision_messages(messages: List[Dict[str, Any]], llm: Any = None, vision_details: Any = "auto", audio_llm: Any = None) -> List[Dict[str, Any]]: +def parse_vision_messages( + messages: List[Dict[str, Any]], + llm: Any = None, + vision_details: Any = "auto", + audio_llm: Any = None, +) -> List[Dict[str, Any]]: """ Assumes input is already a list of message dicts with 'role' and 'content' fields. @@ -751,7 +874,7 @@ def parse_vision_messages(messages: List[Dict[str, Any]], llm: Any = None, visio content = msg["content"] role = msg["role"] - + # Normalize content to list format for unified processing items_to_process = [] if isinstance(content, list): @@ -768,10 +891,12 @@ def parse_vision_messages(messages: List[Dict[str, Any]], llm: Any = None, visio # Regular text or other content, passthrough returned_messages.append(msg) continue - + # Process each item for item in items_to_process: - processed_content = _process_content_item(item, role, llm, vision_details, audio_llm) + processed_content = _process_content_item( + item, role, llm, vision_details, audio_llm + ) if processed_content: returned_messages.append({"role": role, "content": processed_content}) @@ -781,19 +906,20 @@ def parse_vision_messages(messages: List[Dict[str, Any]], llm: Any = None, visio def load_config_from_env() -> Dict[str, Any]: """ Load configuration from environment variables. - + .. deprecated:: 0.1.0 This function is now in :mod:`mem.config_loader`. Please use ``from powermem import load_config_from_env`` instead. - + This is kept for backward compatibility. For the actual implementation, see :mod:`mem.config_loader`. - + Returns: Configuration dictionary built from environment variables """ # Import here to avoid circular import from ..config_loader import load_config_from_env as _load_config_from_env + return _load_config_from_env() @@ -801,10 +927,10 @@ def serialize_datetime(value: Any) -> Any: """ Convert datetime objects to ISO format strings for JSON serialization. Recursively handles dictionaries and lists. - + Args: value: Value to serialize (can be datetime, dict, list, or primitive) - + Returns: Serialized value with datetime objects converted to ISO format strings """ @@ -831,7 +957,7 @@ def convert_config_object_to_dict(obj: Any) -> Any: return None # Handle ConfigObject - if hasattr(obj, 'to_dict'): + if hasattr(obj, "to_dict"): obj = obj.to_dict() # Handle dict @@ -849,84 +975,86 @@ def convert_config_object_to_dict(obj: Any) -> Any: class SnowflakeIDGenerator: """ Snowflake ID generator for distributed systems. - + Generates unique 64-bit IDs using the Snowflake algorithm: - 41 bits for timestamp (milliseconds since epoch) - 10 bits for machine ID (5 bits datacenter + 5 bits worker) - 12 bits for sequence number - + Thread-safe implementation. """ - + # Snowflake parameters EPOCH = 1609459200000 # 2021-01-01 00:00:00 UTC in milliseconds TIMESTAMP_BITS = 41 DATACENTER_BITS = 5 WORKER_BITS = 5 SEQUENCE_BITS = 12 - + MAX_DATACENTER_ID = (1 << DATACENTER_BITS) - 1 # 31 MAX_WORKER_ID = (1 << WORKER_BITS) - 1 # 31 MAX_SEQUENCE = (1 << SEQUENCE_BITS) - 1 # 4095 - + # Bit shifts TIMESTAMP_SHIFT = SEQUENCE_BITS + WORKER_BITS + DATACENTER_BITS DATACENTER_SHIFT = SEQUENCE_BITS + WORKER_BITS WORKER_SHIFT = SEQUENCE_BITS - + def __init__(self, datacenter_id: int = 0, worker_id: int = 0): """ Initialize Snowflake ID generator. - + Args: datacenter_id: Datacenter ID (0-31) worker_id: Worker ID (0-31) - + Raises: ValueError: If datacenter_id or worker_id is out of range """ if datacenter_id < 0 or datacenter_id > self.MAX_DATACENTER_ID: - raise ValueError(f"Datacenter ID must be between 0 and {self.MAX_DATACENTER_ID}") + raise ValueError( + f"Datacenter ID must be between 0 and {self.MAX_DATACENTER_ID}" + ) if worker_id < 0 or worker_id > self.MAX_WORKER_ID: raise ValueError(f"Worker ID must be between 0 and {self.MAX_WORKER_ID}") - + self.datacenter_id = datacenter_id self.worker_id = worker_id self.sequence = 0 self.last_timestamp = -1 self._lock = threading.Lock() - + def _current_timestamp(self) -> int: """Get current timestamp in milliseconds.""" return int(time.time() * 1000) - + def _wait_next_millis(self, last_timestamp: int) -> int: """Wait until next millisecond.""" timestamp = self._current_timestamp() while timestamp <= last_timestamp: timestamp = self._current_timestamp() return timestamp - + def generate(self) -> int: """ Generate a new Snowflake ID. - + Returns: 64-bit integer ID - + Raises: RuntimeError: If clock moves backwards or sequence overflows """ with self._lock: timestamp = self._current_timestamp() - + # Handle clock backwards if timestamp < self.last_timestamp: raise RuntimeError( f"Clock moved backwards. Refusing to generate ID for " f"{self.last_timestamp - timestamp} milliseconds" ) - + # Same millisecond, increment sequence if timestamp == self.last_timestamp: self.sequence = (self.sequence + 1) & self.MAX_SEQUENCE @@ -936,24 +1064,24 @@ def generate(self) -> int: else: # New millisecond, reset sequence self.sequence = 0 - + self.last_timestamp = timestamp - + # Generate ID return ( - ((timestamp - self.EPOCH) << self.TIMESTAMP_SHIFT) | - (self.datacenter_id << self.DATACENTER_SHIFT) | - (self.worker_id << self.WORKER_SHIFT) | - self.sequence + ((timestamp - self.EPOCH) << self.TIMESTAMP_SHIFT) + | (self.datacenter_id << self.DATACENTER_SHIFT) + | (self.worker_id << self.WORKER_SHIFT) + | self.sequence ) - + def generate_batch(self, count: int) -> List[int]: """ Generate a batch of Snowflake IDs. - + Args: count: Number of IDs to generate - + Returns: List of 64-bit integer IDs """ @@ -970,7 +1098,7 @@ def generate_batch(self, count: int) -> List[int]: def get_snowflake_generator() -> SnowflakeIDGenerator: """ Get or create the global Snowflake ID generator instance. - + Returns: Snowflake ID generator instance """ @@ -982,8 +1110,7 @@ def get_snowflake_generator() -> SnowflakeIDGenerator: datacenter_id = int(os.getenv("SNOWFLAKE_DATACENTER_ID", "0")) worker_id = int(os.getenv("SNOWFLAKE_WORKER_ID", "0")) _snowflake_generator = SnowflakeIDGenerator( - datacenter_id=datacenter_id, - worker_id=worker_id + datacenter_id=datacenter_id, worker_id=worker_id ) return _snowflake_generator @@ -991,7 +1118,7 @@ def get_snowflake_generator() -> SnowflakeIDGenerator: def generate_snowflake_id() -> int: """ Generate a new Snowflake ID using the global generator. - + Returns: 64-bit integer ID """ @@ -1006,4 +1133,4 @@ def strip_think_tags(text: str) -> str: """ if "" not in text.lower(): return text.strip() - return re.sub(r"[\s\S]*?", "", text, flags=re.IGNORECASE).strip() \ No newline at end of file + return re.sub(r"[\s\S]*?", "", text, flags=re.IGNORECASE).strip() diff --git a/src/script/scripts/upgrade_sparse_vector.py b/src/script/scripts/upgrade_sparse_vector.py index a54bf6b0..b5582ccc 100644 --- a/src/script/scripts/upgrade_sparse_vector.py +++ b/src/script/scripts/upgrade_sparse_vector.py @@ -95,11 +95,11 @@ def _validate_and_parse_config(config: Dict[str, Any]) -> Tuple[ObVecClient, str else: if not db_name: raise ValueError( - "Missing required parameter 'db_name' for embedded SeekDB connection." + "Missing required parameter 'db_name' for embedded seekdb connection." ) - logger.info(f"Connecting to embedded SeekDB at {ob_path}...") + logger.info(f"Connecting to embedded seekdb at {ob_path}...") obvector = ObVecClient(path=ob_path, db_name=db_name) - logger.info(f"Connected successfully to embedded SeekDB database '{db_name}'") + logger.info(f"Connected successfully to embedded seekdb database '{db_name}'") return obvector, collection_name except (ValueError, RuntimeError): raise diff --git a/src/server/cli/server.py b/src/server/cli/server.py index 2e286353..30e10c4b 100644 --- a/src/server/cli/server.py +++ b/src/server/cli/server.py @@ -34,7 +34,7 @@ def _is_embedded_storage() -> bool: Returns True for: - SQLite (always embedded, file-based) - - OceanBase/SeekDB in embedded mode (OCEANBASE_HOST is empty) + - OceanBase/seekdb in embedded mode (OCEANBASE_HOST is empty) """ try: # Ensure `.env` is loaded before constructing settings classes that do not @@ -71,7 +71,7 @@ def server(host, port, workers, reload, log_level): Start the PowerMem API server. Example: - powermem-server --host 0.0.0.0 --port 8000 --reload + powermem-server --host 0.0.0.0 --port 8848 --reload """ import sys @@ -87,11 +87,11 @@ def server(host, port, workers, reload, log_level): if log_level: config.log_level = log_level - # Embedded databases (SQLite / embedded SeekDB) only support a single process. + # Embedded databases (SQLite / embedded seekdb) only support a single process. # Force workers=1 automatically so users don't have to set it manually. if not config.reload and config.workers != 1 and _is_embedded_storage(): print( - f"[server] Embedded storage detected (SQLite or SeekDB without host). " + f"[server] Embedded storage detected (SQLite or seekdb without host). " f"Forcing workers=1 (was {config.workers}).", file=sys.stderr, ) diff --git a/src/server/config.py b/src/server/config.py index 2f645ca5..aaff4037 100644 --- a/src/server/config.py +++ b/src/server/config.py @@ -3,6 +3,7 @@ """ from __future__ import annotations + from typing import List, Optional from pydantic import Field, field_validator @@ -41,12 +42,15 @@ class ServerSettings(BaseSettings): # Server settings host: str = Field(default="0.0.0.0") - port: int = Field(default=8000) + port: int = Field(default=8848) workers: int = Field(default=4) reload: bool = Field(default=False) # Authentication settings - auth_enabled: bool = Field(default=True) + # Default follows .env.example.full (AUTH_ENABLED=false): off for local / + # test so PowerMem runs zero-config. Enable it (and set API_KEYS) whenever + # the server is reachable from a network. + auth_enabled: bool = Field(default=False) api_keys: str = Field(default="") # Rate limiting settings @@ -55,6 +59,8 @@ class ServerSettings(BaseSettings): # Logging settings log_level: str = Field(default="INFO") + # Default follows .env.example.full (LOG_FORMAT=json): machine-parseable, + # works with log shippers. Set to "text" for human-readable terminal logs. log_format: str = Field(default="json") log_file: Optional[str] = Field(default="server.log") diff --git a/tests/regression/test_api.py b/tests/regression/test_api.py index e9bccdf3..de47c9b8 100644 --- a/tests/regression/test_api.py +++ b/tests/regression/test_api.py @@ -22,7 +22,7 @@ class APITester: """API Test Class""" - def __init__(self, base_url: str = "http://localhost:8000", api_key: str = "key1"): + def __init__(self, base_url: str = "http://localhost:8848", api_key: str = "key1"): """ Initialize tester @@ -2295,8 +2295,8 @@ def main(): import argparse parser = argparse.ArgumentParser(description='powermem API Server Basic Functionality Test') - parser.add_argument('--url', type=str, default='http://localhost:8000', - help='API server base URL (default: http://localhost:8000)') + parser.add_argument('--url', type=str, default='http://localhost:8848', + help='API server base URL (default: http://localhost:8848)') parser.add_argument('--api-key', type=str, default='key1', help='API key (default: key1)') parser.add_argument('--output', type=str, default='results.json', diff --git a/tests/regression/test_dashboard.py b/tests/regression/test_dashboard.py index 309b2632..0dc9649c 100644 --- a/tests/regression/test_dashboard.py +++ b/tests/regression/test_dashboard.py @@ -198,8 +198,8 @@ def pytest_configure(config): # ==================== Configuration ==================== ENV_FILE = os.path.join(_REPO_ROOT, ".env") -DASHBOARD_URL = "http://localhost:8000/dashboard/" -API_BASE_URL = "http://localhost:8000/api/v1" +DASHBOARD_URL = "http://localhost:8848/dashboard/" +API_BASE_URL = "http://localhost:8848/api/v1" SERVER_STARTUP_TIMEOUT = 30 # seconds PAGE_LOAD_TIMEOUT = 10000 # milliseconds @@ -391,7 +391,7 @@ def _dashboard_e2e_preflight( strict_net = _env_truthy("CI") net_ms = int(os.environ.get("POWERMEM_DASHBOARD_PREFLIGHT_NETWORKIDLE_MS", "30000")) - _wait_for_port("127.0.0.1", 8000, total_s=60, label="local TCP") + _wait_for_port("127.0.0.1", 8848, total_s=60, label="local TCP") try: st, body = _http_check("GET", health_url, timeout=15) @@ -489,18 +489,18 @@ def server_process(): # Check if server is already running import socket sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - result = sock.connect_ex(('localhost', 8000)) + result = sock.connect_ex(('localhost', 8848)) sock.close() if result == 0: - print("\n[INFO] Server already running on port 8000") + print("\n[INFO] Server already running on port 8848") yield None return # Start server print("\n[SETUP] Starting PowerMem API server...") process = subprocess.Popen( - [sys.executable, "-m", "src.server.cli.server", "--host", "0.0.0.0", "--port", "8000"], + [sys.executable, "-m", "src.server.cli.server", "--host", "0.0.0.0", "--port", "8848"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, @@ -511,7 +511,7 @@ def server_process(): start_time = time.time() while time.time() - start_time < SERVER_STARTUP_TIMEOUT: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - result = sock.connect_ex(('localhost', 8000)) + result = sock.connect_ex(('localhost', 8848)) sock.close() if result == 0: print("[SETUP] Server started successfully") diff --git a/tests/regression/test_native_language.py b/tests/regression/test_native_language.py index 16e40817..bcf6629f 100644 --- a/tests/regression/test_native_language.py +++ b/tests/regression/test_native_language.py @@ -78,7 +78,7 @@ def user_memory(config): @pytest.fixture(scope="module") def api_client(): """Provide API client for HTTP tests.""" - base_url = os.getenv("POWERMEM_API_URL", "http://localhost:8000") + base_url = os.getenv("POWERMEM_API_URL", "http://localhost:8848") api_key = os.getenv("POWERMEM_API_KEY", "key1") return APIClient(base_url=base_url, api_key=api_key) @@ -86,7 +86,7 @@ def api_client(): class APIClient: """Simple API client for testing HTTP endpoints.""" - def __init__(self, base_url: str = "http://localhost:8000", api_key: str = "key1"): + def __init__(self, base_url: str = "http://localhost:8848", api_key: str = "key1"): self.base_url = base_url.rstrip('/') self.api_base = f"{self.base_url}/api/v1" self.api_key = api_key diff --git a/tests/regression/test_scenario_5_custom_integration.py b/tests/regression/test_scenario_5_custom_integration.py index 658204d1..1ac4b756 100644 --- a/tests/regression/test_scenario_5_custom_integration.py +++ b/tests/regression/test_scenario_5_custom_integration.py @@ -1173,7 +1173,7 @@ async def run_tests(): print("\n✓ FastAPI integration test completed successfully!") print("\n To run the actual server:") print(" uvicorn :app --reload") - print(" Then access API docs at: http://localhost:8000/docs") + print(" Then access API docs at: http://localhost:8848/docs") except Exception as e: print(f" ⚠ Integration test error: {str(e)[:100]}") @@ -1185,18 +1185,18 @@ async def run_tests(): print("# To run the FastAPI server:") print("# 1. Save this app to a file (e.g., main.py)") print("# 2. Run: uvicorn main:app --reload") - print("# 3. Access API docs at: http://localhost:8000/docs") + print("# 3. Access API docs at: http://localhost:8848/docs") print("# 4. Test endpoints using the interactive API documentation") - # Stop any running server on port 8000 (if any) + # Stop any running server on port 8848 (if any) _print_step("Step 5 Cleanup: Stopping any running servers") try: import subprocess import os import signal - def stop_server_on_port(port: int = 8000): + def stop_server_on_port(port: int = 8848): """Stop any process listening on the specified port""" try: # Try to find processes using the port @@ -1265,22 +1265,22 @@ def stop_server_on_port(port: int = 8000): print(f" ⚠ Error checking port {port}: {str(e)[:100]}") return False - # Stop server on port 8000 - if stop_server_on_port(8000): - print("✓ Server processes on port 8000 have been stopped") + # Stop server on port 8848 + if stop_server_on_port(8848): + print("✓ Server processes on port 8848 have been stopped") else: # Verify port is free try: result = subprocess.run( - ['lsof', '-ti', ':8000'], + ['lsof', '-ti', ':8848'], capture_output=True, text=True, timeout=2 ) if result.returncode != 0 or not result.stdout.strip(): - print("✓ Port 8000 is free (no server running)") + print("✓ Port 8848 is free (no server running)") else: - print(" ⚠ Port 8000 may still be in use") + print(" ⚠ Port 8848 may still be in use") except (FileNotFoundError, subprocess.TimeoutExpired): print(" ⚠ Could not verify port status (lsof not available)") diff --git a/tests/unit/test_pyseekdb_default_embeddings.py b/tests/unit/test_pyseekdb_default_embeddings.py new file mode 100644 index 00000000..382045fd --- /dev/null +++ b/tests/unit/test_pyseekdb_default_embeddings.py @@ -0,0 +1,157 @@ +"""Tests for the built-in zero-config default embedder (issues #940/#941). + +The embedder wraps pyseekdb's ``DefaultEmbeddingFunction``. We mock that out so +the test never has to download an ONNX model — what we want to verify is that +PowerMem wires the default correctly and that ``MemoryConfig()`` no longer +requires an OPENAI key to be constructible. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +# Import the submodule explicitly so ``pyseekdb_default`` is a resolved +# attribute of the package before any test patches it. ``unittest.mock`` on +# Python 3.11 does not auto-import the final submodule when resolving a dotted +# patch target, so patch.object(, ...) is used instead of a string. +from powermem.integrations.embeddings import pyseekdb_default + +# --------------------------------------------------------------------------- +# Embedder behaviour +# --------------------------------------------------------------------------- + + +@pytest.fixture +def mock_default_fn(): + """Mock pyseekdb.DefaultEmbeddingFunction so no model is downloaded. + + We also stub out ``_load_sentence_transformer_with_fallback``: the + embedder pre-warms a ``sentence_transformers`` model in ``__init__`` + before creating the DefaultEmbeddingFunction. Without this stub the test + would import ``sentence_transformers`` (an optional extra) and hit the + network on a cache miss, defeating the point of mocking the embedder. + """ + with ( + patch( + "pyseekdb.client.embedding_function.DefaultEmbeddingFunction" + ) as mock_cls, + patch.object(pyseekdb_default, "_load_sentence_transformer_with_fallback"), + ): + instance = MagicMock() + # Return one 384-dim vector per input document, matching all-MiniLM-L6-v2. + instance.side_effect = lambda docs: [[0.1] * 384 for _ in docs] + mock_cls.return_value = instance + yield mock_cls + + +def test_embed_returns_384_dim_vector(mock_default_fn): + from powermem.integrations.embeddings.pyseekdb_default import ( + PyseekdbDefaultEmbedding, + ) + + embedder = PyseekdbDefaultEmbedding() + vec = embedder.embed("hello world") + + assert isinstance(vec, list) + assert len(vec) == 384 + mock_default_fn.assert_called_once() + + +def test_embed_batch_returns_per_input_vectors(mock_default_fn): + from powermem.integrations.embeddings.pyseekdb_default import ( + PyseekdbDefaultEmbedding, + ) + + embedder = PyseekdbDefaultEmbedding() + vectors = embedder.embed_batch(["a", "b", "c"]) + + assert len(vectors) == 3 + assert all(len(v) == 384 for v in vectors) + + +def test_embed_batch_empty_input_short_circuits(mock_default_fn): + from powermem.integrations.embeddings.pyseekdb_default import ( + PyseekdbDefaultEmbedding, + ) + + embedder = PyseekdbDefaultEmbedding() + + assert embedder.embed_batch([]) == [] + + +def test_embed_rejects_none(mock_default_fn): + from powermem.integrations.embeddings.pyseekdb_default import ( + PyseekdbDefaultEmbedding, + ) + + embedder = PyseekdbDefaultEmbedding() + + with pytest.raises(ValueError): + embedder.embed(None) + + +def test_config_defaults_match_pyseekdb(mock_default_fn): + from powermem.integrations.embeddings.pyseekdb_default import ( + PyseekdbDefaultEmbedding, + ) + + embedder = PyseekdbDefaultEmbedding() + + assert embedder.config.model == "all-MiniLM-L6-v2" + assert embedder.config.embedding_dims == 384 + + +# --------------------------------------------------------------------------- +# Provider registry / factory wiring +# --------------------------------------------------------------------------- + + +def test_default_provider_is_registered(): + # Importing providers populates the registry via __pydantic_init_subclass__. + import powermem.integrations.embeddings.config.providers # noqa: F401 + from powermem.integrations.embeddings.config.base import BaseEmbedderConfig + + assert BaseEmbedderConfig.has_provider("default") + assert ( + BaseEmbedderConfig.get_provider_class_path("default") + == "powermem.integrations.embeddings.pyseekdb_default.PyseekdbDefaultEmbedding" + ) + + +def test_factory_resolves_default_provider(mock_default_fn): + from powermem.integrations.embeddings.config.providers import ( + PyseekdbDefaultEmbeddingConfig, + ) + from powermem.integrations.embeddings.factory import EmbedderFactory + + embedder = EmbedderFactory.create( + "default", PyseekdbDefaultEmbeddingConfig(), vector_config=None + ) + + assert embedder.embed("hi") # round-trip via factory + + +# --------------------------------------------------------------------------- +# Zero-config MemoryConfig (the main #941 acceptance criterion) +# --------------------------------------------------------------------------- + + +def test_memory_config_default_embedder_requires_no_api_key(monkeypatch): + """MemoryConfig() with no .env should pick the local default embedder.""" + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("EMBEDDING_API_KEY", raising=False) + monkeypatch.delenv("EMBEDDING_PROVIDER", raising=False) + + from powermem.configs import MemoryConfig + from powermem.integrations.embeddings.config.providers import ( + PyseekdbDefaultEmbeddingConfig, + ) + + cfg = MemoryConfig() + + assert isinstance(cfg.embedder, PyseekdbDefaultEmbeddingConfig) + assert cfg.embedder.provider == "default" + assert cfg.embedder.api_key is None + assert cfg.embedder.embedding_dims == 384 diff --git a/tests/unit/test_seekdb_default_storage.py b/tests/unit/test_seekdb_default_storage.py new file mode 100644 index 00000000..9cf4105e --- /dev/null +++ b/tests/unit/test_seekdb_default_storage.py @@ -0,0 +1,45 @@ +"""Tests for the zero-config storage default. + +The OceanBase provider covers both deployment shapes: with no ``OCEANBASE_HOST`` +configured it boots embedded seekdb on disk; with ``OCEANBASE_HOST`` set it +talks to a remote OceanBase cluster. There is intentionally no separate +``seekdb`` database provider — seekdb is just how the OceanBase backend +behaves in embedded mode. +""" + +from __future__ import annotations + + +def test_memory_config_default_storage_is_oceanbase_in_embedded_mode(monkeypatch): + """`MemoryConfig()` with no env vars defaults to the OceanBase provider + with an empty host — i.e. embedded seekdb on disk. + """ + monkeypatch.delenv("DATABASE_PROVIDER", raising=False) + monkeypatch.delenv("OCEANBASE_HOST", raising=False) + + from powermem.configs import MemoryConfig + from powermem.storage.config.oceanbase import OceanBaseConfig + + cfg = MemoryConfig() + + assert isinstance(cfg.vector_store, OceanBaseConfig) + assert cfg.vector_store.host == "" # → embedded seekdb mode + assert cfg.vector_store.ob_path == "./seekdb_data" + + +def test_database_settings_default_provider_is_oceanbase(monkeypatch): + monkeypatch.delenv("DATABASE_PROVIDER", raising=False) + + from powermem.config_loader import DatabaseSettings + + assert DatabaseSettings().provider == "oceanbase" + + +def test_oceanbase_provider_picks_up_remote_host(monkeypatch): + """Setting OCEANBASE_HOST flips the same provider into remote mode.""" + monkeypatch.setenv("OCEANBASE_HOST", "ob.example.com") + + from powermem.storage.config.oceanbase import OceanBaseConfig + + cfg = OceanBaseConfig() + assert cfg.host == "ob.example.com"