diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..070a776
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,11 @@
+.git
+.gitignore
+.github
+tests
+docs
+examples
+dist
+build
+*.egg-info
+__pycache__
+*.pyc
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..d4be30a
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1 @@
+* @buralux
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..cad3af1
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,49 @@
+name: CI
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+      - cursor/**
+
+permissions:
+  contents: read
+
+jobs:
+  test-and-build:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.11", "3.12"]
+
+    env:
+      DSM_MEMORY_DIR: ${{ runner.temp }}/memory
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e ".[dev,web]"
+
+      - name: Run tests
+        run: python -m unittest discover -s tests -q
+
+      - name: Build package
+        run: python -m build
+
+      - name: Upload dist artifacts
+        if: matrix.python-version == '3.12'
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/*
diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml
new file mode 100644
index 0000000..d4dfb51
--- /dev/null
+++ b/.github/workflows/release-pypi.yml
@@ -0,0 +1,48 @@
+name: Publish to PyPI
+
+on:
+  push:
+    tags:
+      - "v*"
+
+permissions:
+  contents: read
+
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    env:
+      DSM_MEMORY_DIR: ${{ runner.temp }}/memory
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Ensure tag commit is on main
+        run: |
+          git fetch origin main --depth=1
+          git merge-base --is-ancestor "$GITHUB_SHA" origin/main
+
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install build twine
+
+      - name: Build distributions
+        run: python -m build
+
+      - name: Check distributions
+        run: twine check dist/*
+
+      - name: Publish to PyPI
+        if: startsWith(github.ref, 'refs/tags/v')
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..adc97ad
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-json
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+      - id: trailing-whitespace
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..5bbb070
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,24 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
+and this project follows [Semantic Versioning](https://semver.org/).
+
+## [0.4.0] - 2026-02-25
+
+### Added
+
+- Packaging with `pyproject.toml`.
+- Console scripts: `daryl-memory` and `dsm-webui`.
+- CI workflow (`.github/workflows/ci.yml`) with tests + build.
+- Release workflow (`.github/workflows/release-pypi.yml`) for PyPI.
+- Deployment assets: `Dockerfile`, `docker-compose.yml`, `.dockerignore`.
+- Contribution and security docs: `CONTRIBUTING.md`, `SECURITY.md`.
+- Developer tooling: `Makefile`, `.pre-commit-config.yaml`.
+
+### Changed
+
+- Runtime logs are quiet by default, with `--verbose` in CLI.
+- Warnings/errors redirected to stderr.
+- Runtime portability improvements around `DSM_MEMORY_DIR`.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..62f8569
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,49 @@
+# Contributing to DSM
+
+Merci de contribuer au projet DSM.
+
+## Pré-requis
+
+- Python 3.10+
+- `pip` récent
+- `git`
+
+## Setup local
+
+```bash
+git clone https://github.com/buralux/dsm.git
+cd dsm
+python3 -m pip install -e ".[dev,web]"
+pre-commit install
+```
+
+## Workflow de contribution
+
+1. Crée une branche depuis `main`.
+2. Fais des commits petits et descriptifs.
+3. Lance les vérifications locales:
+
+```bash
+make test
+make build
+make precommit
+```
+
+4. Ouvre une Pull Request avec:
+   - contexte,
+   - changements,
+   - impacts éventuels,
+   - stratégie de test.
+
+## Standards attendus
+
+- Code lisible et cohérent avec le style existant.
+- Pas de secrets, clés API ou credentials dans le repo.
+- Toute évolution significative doit être couverte par des tests.
+- Mettre à jour la documentation si le comportement change.
+
+## Versioning / release
+
+- Le projet suit SemVer.
+- Les entrées de version doivent être ajoutées dans `CHANGELOG.md`.
+- La publication PyPI est déclenchée par tag `v*` via GitHub Actions.
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..960150e
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,29 @@
+FROM python:3.12-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+
+WORKDIR /app
+
+RUN adduser --disabled-password --gecos "" appuser
+
+COPY pyproject.toml README.md LICENSE /app/
+COPY src /app/src
+
+RUN python -m pip install --upgrade pip && \
+    python -m pip install --no-cache-dir ".[web]"
+
+ENV DSM_MEMORY_DIR=/data/memory \
+    DSM_WEB_HOST=0.0.0.0 \
+    DSM_WEB_PORT=8000
+
+RUN mkdir -p /data/memory && chown -R appuser:appuser /app /data
+USER appuser
+
+EXPOSE 8000
+
+HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
+  CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/stats', timeout=3)" || exit 1
+
+CMD ["dsm-webui"]
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..f70317d
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
+include README.md
+include LICENSE
+include CHANGELOG.md
+recursive-include src/webui/templates *.html
+recursive-include src/webui/static *.js *.css
+recursive-include docs *.md
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..440568d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,41 @@
+PYTHON ?= python3
+PIP ?= $(PYTHON) -m pip
+
+.PHONY: install install-web install-dev test build clean run-cli run-web precommit docker-build docker-up docker-down
+
+install:
+	$(PIP) install -e .
+
+install-web:
+	$(PIP) install -e ".[web]"
+
+install-dev:
+	$(PIP) install -e ".[dev,web]"
+
+test:
+	$(PYTHON) -m unittest discover -s tests -q
+
+build:
+	$(PIP) install --quiet build
+	$(PYTHON) -m build
+
+precommit:
+	pre-commit run --all-files
+
+run-cli:
+	$(PYTHON) -m cli.daryl_memory_cli status
+
+run-web:
+	$(PYTHON) -m webui.app
+
+docker-build:
+	docker build -t dsm:latest .
+
+docker-up:
+	docker compose up --build
+
+docker-down:
+	docker compose down
+
+clean:
+	rm -rf build dist .pytest_cache *.egg-info
diff --git a/README.md b/README.md
index 770fc25..e1decbe 100644
--- a/README.md
+++ b/README.md
@@ -1,57 +1,176 @@
 # DSM — Daryl Sharding Memory
 
-A **lightweight, Python-based semantic memory system** for building stateful AI agents with intelligent routing and cross-references.
+A **Python-based semantic memory system** for stateful AI agents, with domain sharding, semantic search, compression and TTL cleanup.
+
+---
+
+## ✅ Production Readiness
+
+- **Installable**: `pyproject.toml` + console scripts (`daryl-memory`, `dsm-webui`)
+- **Packaged**: wheel/sdist build via `python -m build`
+- **Deployable**: `Dockerfile` + `docker-compose.yml`
+- **Publishable**: GitHub release workflow for PyPI (`v*` tags)
+- **Credible**: CI pipeline, security policy, contribution guide, changelog, pre-commit
 
 ---
 
 ## 🚀 Quick Start
 
-### Installation
 ```bash
-# Clone repository
-git clone https://github.com/daryl-labs/dsm.git
+git clone https://github.com/buralux/dsm.git
 cd dsm
+python3 -m pip install -e ".[dev,web]"
+daryl-memory status
+```
+
+---
+
+## 📦 Install
+
+### From PyPI
+
+```bash
+python3 -m pip install daryl-sharding-memory
+daryl-memory --help
+```
+
+### Local editable install (contributors)
+
+```bash
+python3 -m pip install -e ".[dev,web]"
+```
 
-# Run system
+### Project naming (important)
+
+- **PyPI package name**: `daryl-sharding-memory`
+- **Python imports**: module-based (`memory_sharding_system`, `semantic_search`, etc.)
+- **CLI commands**: `daryl-memory`, `dsm-webui`
+
+---
+
+## 🧪 Dev run
+
+```bash
+# Core demo run
 python3 src/memory_sharding_system.py
+
+# CLI via source module
+python3 -m cli.daryl_memory_cli status
+
+# Web UI via source module
+python3 -m webui.app
 ```
 
-### CLI Usage
+---
+
+## 💻 CLI usage
+
 ```bash
+# Check system status
+daryl-memory status
+
 # Add memory with automatic routing
-python3 src/cli/daryl_memory_cli.py add "Projet actif: Finaliser GitHub release" --importance 0.9
+daryl-memory add "Projet actif: Finaliser GitHub release" --importance 0.9
 
 # Search across all shards
-python3 src/cli/daryl_memory_cli.py query "GitHub" --limit 5
+daryl-memory query "GitHub" --limit 5
 
-# Search in specific shard
-python3 src/cli/daryl_memory_cli.py search shard_projects "GitHub"
+# Search in a specific shard
+daryl-memory search shard_projects "GitHub"
 
-# Check system status
-python3 src/cli/daryl_memory_cli.py status
+# Verbose mode
+daryl-memory --verbose status
+```
 
-# Get help
-python3 src/cli/daryl_memory_cli.py help
+---
+
+## 📦 Packaging
+
+Build distributables:
+
+```bash
+python3 -m pip install -U build
+python3 -m build
+```
+
+Artifacts are generated in `dist/` (`.whl` + `.tar.gz`).
+
+---
+
+## 🚢 Deployment
+
+### Docker
+
+```bash
+docker build -t dsm:latest .
+docker run --rm -p 8000:8000 -e DSM_MEMORY_DIR=/data/memory -v "$(pwd)/memory:/data/memory" dsm:latest
 ```
 
+### Docker Compose
+
+```bash
+docker compose up --build
+```
+
+---
+
+## 🌍 Publishing
+
+Manual publication flow:
+
+```bash
+python3 -m pip install -U build twine
+python3 -m build
+twine check dist/*
+twine upload dist/*
+```
+
+Automated flow:
+
+- Configure repository secret: `PYPI_API_TOKEN`
+- Ensure the tagged commit is already reachable from `main`
+- Create a tag `vX.Y.Z`
+- Push the tag
+- GitHub Action `Publish to PyPI` publishes the package
+
+Note: the release workflow is tag-based (`v*`) and is intended for mainline releases.
+
+---
+
+## 🧩 Compatibility
+
+- **Python**: 3.10, 3.11, 3.12
+- **OS**: Linux/macOS/Windows (WSL recommended on Windows)
+- **Docker**: optional for deployment
+- **Embeddings backend**:
+  - default: deterministic local dummy embeddings (no model download)
+  - optional real model: install `.[ml]` and set `DSM_USE_REAL_EMBEDDINGS=1`
+
 ---
 
 ## 📁 Architecture
 
 ```
 dsm/
+├── pyproject.toml                  # Packaging metadata
+├── Dockerfile                      # Container deployment
+├── docker-compose.yml              # Local deployment
+├── .github/workflows/
+│   ├── ci.yml                      # CI (tests + build)
+│   └── release-pypi.yml            # Publish on v* tags
 ├── memory/
-│   └── shards/           # 5 domain-specific memory stores (JSON)
+│   └── shards/                     # 5 domain-specific memory stores (JSON)
 ├── src/
 │   ├── memory_sharding_system.py   # Core sharding logic
-│   ├── link_validator.py          # Cross-reference validation
-│   └── cli/
-│       └── daryl_memory_cli.py    # Command-line interface
+│   ├── semantic_search.py          # Semantic retrieval
+│   ├── memory_compressor.py        # Compression / deduplication
+│   ├── memory_cleaner.py           # TTL cleanup
+│   ├── cli/
+│   │   └── daryl_memory_cli.py     # Command-line interface
+│   └── webui/
+│       └── app.py                  # FastAPI web interface
 ├── docs/
-│   ├── SECURITY_CONSIDERATIONS.md  # Security model
-│   ├── spec_global_memory_architecture.md
-│   └── daryl_sharding_critique_analysis.md
-└── docs/                           # Specs, API, security
+└── tests/
 ```
 
 ---
@@ -130,16 +249,16 @@ See `docs/SECURITY_CONSIDERATIONS.md` for complete security model:
 ## 🚧 Roadmap
 
 ### Phase 1: Core Improvements (v1.1)
-- [ ] **Semantic search** - Beyond full-text (embeddings, cosine similarity)
-- [ ] **Memory compression** - Efficient storage for long-running agents
-- [ ] **Time-based expiry** - Automatic cleanup of old low-importance memories
+- [x] **Semantic search** - Beyond full-text (embeddings, cosine similarity)
+- [x] **Memory compression** - Efficient storage for long-running agents
+- [x] **Time-based expiry** - Automatic cleanup of old low-importance memories
 - [ ] **Bulk operations** - Import/export multiple transactions at once
 
 ### Phase 2: Integration (v1.2)
-- [ ] **Web UI** - Visual dashboard for memory management
+- [x] **Web UI** - Visual dashboard for memory management
 - [ ] **REST API** - HTTP endpoints for external systems
 - [ ] **Multi-language support** - Translations (EN, FR, etc.)
-- [ ] **Docker container** - Easy deployment
+- [x] **Docker container** - Easy deployment
 
 ### Phase 3: Advanced Features (v2.0)
 - [ ] **Memory consolidation** - Automatic summarization and merging
@@ -157,8 +276,8 @@ Apache-2.0. See [LICENSE](LICENSE).
 
 ## 🤝 Contributing
 
-Feedback and contributions welcome. See [daryl.md](https://daryl.md) and issues on GitHub.
+See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, standards and PR workflow.
 
 ---
 
-*DARYL-LABS — https://daryl.md — https://github.com/daryl-labs/dsm*
+For security disclosures, see [SECURITY.md](SECURITY.md).
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..c87f35c
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,33 @@
+# Security Policy
+
+## Supported versions
+
+Les versions supportées sont les dernières versions mineures publiées.
+
+| Version | Support |
+| --- | --- |
+| 0.4.x | ✅ |
+| < 0.4.0 | ❌ |
+
+## Reporting a vulnerability
+
+Merci de **ne pas** ouvrir d’issue publique pour une vulnérabilité.
+
+Procédure recommandée:
+
+1. Ouvre une GitHub Security Advisory (private disclosure) sur le repository.
+2. Décris le contexte, l’impact, et des étapes de reproduction minimales.
+3. Si possible, propose un correctif ou un contournement.
+
+Nous faisons le maximum pour:
+
+- accuser réception rapidement,
+- qualifier la sévérité,
+- publier un correctif et une note de sécurité.
+
+## Security hardening guidance
+
+- Ne stockez pas de secrets dans les shards.
+- Exécutez DSM avec des permissions minimales.
+- Isolez les volumes de données en production (`DSM_MEMORY_DIR`).
+- Maintenez vos dépendances à jour (`pip list --outdated`).
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..d9bbaf0
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,15 @@
+services:
+  dsm:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: dsm:latest
+    ports:
+      - "8000:8000"
+    environment:
+      DSM_MEMORY_DIR: /data/memory
+      DSM_WEB_HOST: 0.0.0.0
+      DSM_WEB_PORT: "8000"
+    volumes:
+      - ./memory:/data/memory
+    restart: unless-stopped
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..3a04ca4
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,75 @@
+[build-system]
+requires = ["setuptools>=69.0,<77.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "daryl-sharding-memory"
+version = "0.4.0"
+description = "Domain-sharded persistent memory system for AI agents."
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "Apache-2.0" }
+authors = [
+  { name = "DARYL-LABS" }
+]
+keywords = ["ai", "memory", "agent", "semantic-search", "sharding"]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Topic :: Software Development :: Libraries",
+  "Topic :: Scientific/Engineering :: Artificial Intelligence"
+]
+dependencies = [
+  "numpy>=1.26.0"
+]
+
+[project.optional-dependencies]
+web = [
+  "fastapi>=0.116.0",
+  "jinja2>=3.1.0",
+  "uvicorn[standard]>=0.35.0"
+]
+ml = [
+  "sentence-transformers>=3.0.0"
+]
+dev = [
+  "build>=1.2.2",
+  "fastapi>=0.116.0",
+  "jinja2>=3.1.0",
+  "pre-commit>=4.0.0",
+  "pytest>=8.0.0",
+  "twine>=6.1.0",
+  "uvicorn[standard]>=0.35.0"
+]
+
+[project.urls]
+Homepage = "https://github.com/buralux/dsm"
+Repository = "https://github.com/buralux/dsm"
+Issues = "https://github.com/buralux/dsm/issues"
+Documentation = "https://github.com/buralux/dsm/tree/main/docs"
+
+[project.scripts]
+daryl-memory = "cli.daryl_memory_cli:main"
+dsm-webui = "webui.app:serve"
+
+[tool.setuptools]
+package-dir = { "" = "src" }
+packages = ["cli", "webui", "webui.static", "webui.templates"]
+py-modules = [
+  "embedding_service",
+  "link_validator",
+  "memory_cleaner",
+  "memory_compressor",
+  "memory_sharding_system",
+  "semantic_search",
+  "semantic_search_header"
+]
+include-package-data = true
+
+[tool.setuptools.package-data]
+webui = ["templates/*.html", "static/*.js", "static/*.css"]
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..65bb718
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,6 @@
+-r requirements.txt
+-r requirements-web.txt
+build>=1.2.2
+pre-commit>=4.0.0
+pytest>=8.0.0
+twine>=6.1.0
diff --git a/requirements-web.txt b/requirements-web.txt
new file mode 100644
index 0000000..aa2794e
--- /dev/null
+++ b/requirements-web.txt
@@ -0,0 +1,3 @@
+fastapi>=0.116.0
+jinja2>=3.1.0
+uvicorn[standard]>=0.35.0
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..bbb79f7
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+numpy>=1.26.0
diff --git a/src/cli/__init__.py b/src/cli/__init__.py
new file mode 100644
index 0000000..37139ac
--- /dev/null
+++ b/src/cli/__init__.py
@@ -0,0 +1 @@
+"""CLI package for DARYL Sharding Memory."""
diff --git a/src/cli/daryl_memory_cli.py b/src/cli/daryl_memory_cli.py
index 18596cf..91c58a0 100644
--- a/src/cli/daryl_memory_cli.py
+++ b/src/cli/daryl_memory_cli.py
@@ -15,7 +15,13 @@
 
 from memory_sharding_system import ShardRouter
 
-def cmd_add(args):
+
+def build_router(verbose: bool = False):
+    """Construit un routeur DSM avec niveau de logs configurable."""
+    return ShardRouter(verbose=verbose)
+
+
+def cmd_add(args, verbose=False):
     """Ajouter une mémoire"""
     if not args:
         print("Usage: daryl-memory add \"<contenu>\" [--importance <0.5-1.0>] [--source <manual|moltbook>]")
@@ -40,8 +46,7 @@ def cmd_add(args):
         else:
             i += 1
 
-    router = ShardRouter()
-    router.load_all_shards()
+    router = build_router(verbose=verbose)
 
     transaction_id = router.add_memory(content, source=source, importance=importance)
 
@@ -57,9 +62,9 @@ def cmd_add(args):
         print(f"   Source: {source}")
         print(f"   Importance: {importance}")
     else:
-        print(f"❌ Erreur: Impossible de trouver le shard cible")
+        print(f"❌ Erreur: Impossible de trouver le shard cible", file=sys.stderr)
 
-def cmd_query(args):
+def cmd_query(args, verbose=False):
     """Rechercher des mémoires"""
     if len(args) < 1:
         print("Usage: daryl-memory query \"<texte>\" [--limit <n>] [--cross]")
@@ -84,8 +89,7 @@ def cmd_query(args):
         else:
             i += 1
 
-    router = ShardRouter()
-    router.load_all_shards()
+    router = build_router(verbose=verbose)
 
     if cross_shard:
         results = router.cross_shard_search(query_text)
@@ -102,7 +106,7 @@ def cmd_query(args):
         content = r["content"][:70] + "..." if len(r["content"]) > 70 else r["content"]
         print(f"  • [{shard_name}] {content}")
 
-def cmd_search(args):
+def cmd_search(args, verbose=False):
     """Rechercher dans un shard spécifique"""
     if len(args) < 2:
         print("Usage: daryl-memory search \"<shard_id>\" \"<texte>\" [--limit <n>]")
@@ -121,11 +125,10 @@ def cmd_search(args):
         else:
             i += 1
 
-    router = ShardRouter()
-    router.load_all_shards()
+    router = build_router(verbose=verbose)
 
     if shard_id not in router.shards:
-        print(f"❌ Erreur: Shard '{shard_id}' introuvable")
+        print(f"❌ Erreur: Shard '{shard_id}' introuvable", file=sys.stderr)
         return
 
     shard = router.shards[shard_id]
@@ -139,10 +142,9 @@ def cmd_search(args):
         content = r["content"][:70] + "..." if len(r["content"]) > 70 else r["content"]
         print(f"  • {content}")
 
-def cmd_status(args):
+def cmd_status(args, verbose=False):
     """Afficher le statut des shards"""
-    router = ShardRouter()
-    router.load_all_shards()
+    router = build_router(verbose=verbose)
 
     print("📊 Statut des Shards DARYL:")
     print()
@@ -153,7 +155,8 @@ def cmd_status(args):
         name = shard_status["name"]
         count = shard_status["transactions_count"]
         importance = shard_status["importance_score"]
-        last = shard_status["last_updated"][:19]  # Just date et heure
+        last_updated = shard_status.get("last_updated")
+        last = last_updated[:19] if isinstance(last_updated, str) else "N/A"
 
         # Émoji basé sur le nombre de transactions
         if count == 0:
@@ -170,11 +173,14 @@ def cmd_status(args):
     summary = router.export_shards_summary()
     print(f"\n📊 Total: {summary['total_shards']} shards, {summary['total_transactions']} transactions")
 
-def cmd_help(args):
+def cmd_help(args, verbose=False):
     """Afficher l'aide"""
     print("=== DARYL Sharding Memory CLI v2.0 ===")
     print()
-    print("Usage: daryl-memory <commande> [arguments...]")
+    print("Usage: daryl-memory [--verbose|-v] <commande> [arguments...]")
+    print()
+    print("Options globales:")
+    print("  -v, --verbose                    Activer les logs détaillés")
     print()
     print("Commandes disponibles:")
     print("  add     \"<contenu>\"         Ajouter une mémoire")
@@ -185,7 +191,7 @@ def cmd_help(args):
     print()
     print("Exemples:")
     print("  daryl-memory add \"Projet: Finaliser la doc\" --importance 0.8")
-    print("  daryl-memory query \"stratégie\" --limit 5")
+    print("  daryl-memory --verbose query \"stratégie\" --limit 5")
     print("  daryl-memory search shard_projects \"GitHub\"")
     print()
     print("Pour plus d'informations, voir README.md")
@@ -193,24 +199,50 @@ def cmd_help(args):
 def main():
     """Point d'entrée principal"""
     if len(sys.argv) < 2:
-        cmd_help([])
+        cmd_help([], verbose=False)
+        return
+
+    raw_args = sys.argv[1:]
+    verbose = False
+    help_requested = False
+    filtered_args = []
+
+    for arg in raw_args:
+        if arg in ("-v", "--verbose"):
+            verbose = True
+        elif arg in ("-h", "--help"):
+            help_requested = True
+        else:
+            filtered_args.append(arg)
+
+    if help_requested and not filtered_args:
+        cmd_help([], verbose=verbose)
+        return
+
+    if not filtered_args:
+        cmd_help([], verbose=verbose)
+        return
+
+    if help_requested:
+        cmd_help([], verbose=verbose)
         return
 
-    command = sys.argv[1].lower()
+    command = filtered_args[0].lower()
+    cmd_args = filtered_args[1:]
 
     if command == "add":
-        cmd_add(sys.argv[2:])
+        cmd_add(cmd_args, verbose=verbose)
     elif command == "query":
-        cmd_query(sys.argv[2:])
+        cmd_query(cmd_args, verbose=verbose)
     elif command == "search":
-        cmd_search(sys.argv[2:])
+        cmd_search(cmd_args, verbose=verbose)
     elif command == "status":
-        cmd_status(sys.argv[2:])
+        cmd_status(cmd_args, verbose=verbose)
     elif command == "help":
-        cmd_help(sys.argv[2:])
+        cmd_help(cmd_args, verbose=verbose)
     else:
-        print(f"❌ Commande inconnue: {command}")
-        print("Utilisez 'daryl-memory help' pour voir les commandes disponibles")
+        print(f"❌ Commande inconnue: {command}", file=sys.stderr)
+        print("Utilisez 'daryl-memory help' pour voir les commandes disponibles", file=sys.stderr)
 
 if __name__ == "__main__":
     main()
diff --git a/src/embedding_service.py b/src/embedding_service.py
index 49788cd..8207a5d 100644
--- a/src/embedding_service.py
+++ b/src/embedding_service.py
@@ -7,6 +7,8 @@
 
 import json
 import hashlib
+import os
+import sys
 import numpy as np
 from datetime import datetime
 from pathlib import Path
@@ -39,34 +41,44 @@ def encode(self, texts, convert_to_numpy=True, normalize_embeddings=False, **kwa
         Returns:
             Embeddings numpy array (shape: [n, 384])
         """
-        # Convertir en liste si nécessaire
-        if isinstance(texts, str):
-            texts = [texts]
-        
-        # Générer des embeddings déterministes basés sur le hash du texte
+        single_input = isinstance(texts, str)
+        text_list = [texts] if single_input else texts
+
         embeddings = []
-        for text in texts:
-            # Hash du texte pour génération déterministe
-            s = sum(ord(c) for c in text.strip().lower()) % 1000
-            s_norm = (s + 1) / 1001.0
-            
-            # Créer un embedding pseudo-aléatoire mais déterministe
-            arr = []
-            for i in range(384):
-                # Seed basé sur hash du texte + index
-                np.random.seed(s + i * 1000)
-                val = (np.random.rand() - 0.5) * 2.0  # Valeur entre -1 et 1
-                arr.append(val)
-            
-            embeddings.append(arr)
-        
-        return np.array(embeddings, dtype=np.float32)
+        for text in text_list:
+            vec = np.zeros(self.dimension, dtype=np.float32)
+            tokens = str(text).strip().lower().split()
+            if not tokens:
+                tokens = ["__empty__"]
+
+            # Hashing trick: similarité lexicale simple et déterministe.
+            for token in tokens:
+                token_hash = int(hashlib.sha256(token.encode("utf-8")).hexdigest(), 16)
+                idx = token_hash % self.dimension
+                vec[idx] += 1.0
+
+            if normalize_embeddings:
+                norm = np.linalg.norm(vec)
+                if norm > 0:
+                    vec = vec / norm
+
+            embeddings.append(vec)
+
+        embeddings_arr = np.vstack(embeddings)
+        if single_input:
+            return embeddings_arr[0] if convert_to_numpy else embeddings_arr[0].tolist()
+        return embeddings_arr if convert_to_numpy else embeddings_arr.tolist()
 
 
 class EmbeddingService:
     """Service pour générer et mettre en cache des embeddings"""
     
-    def __init__(self, model_name: str = "all-MiniLM-L6-v2", model: Optional[Union[str, DummyModel]] = None):
+    def __init__(
+        self,
+        model_name: str = "all-MiniLM-L6-v2",
+        model: Optional[Union[str, DummyModel]] = None,
+        verbose: bool = False,
+    ):
         """
         Initialise le service d'embeddings
         
@@ -75,13 +87,22 @@ def __init__(self, model_name: str = "all-MiniLM-L6-v2", model: Optional[Union[s
             model: Modèle optionnel (pour tests/mocks)
         """
         self.model_name = model_name
-        self.model = model  # Permet d'injecter un modèle (ex: DummyModel pour tests)
+        self.verbose = verbose
+        # Par défaut on démarre avec DummyModel (zéro téléchargement)
+        self.model = model if model is not None else DummyModel(model_name)
         self.cache = {}  # Cache en mémoire pour les embeddings
         self._real_model = None  # Modèle réel (lazy load)
-        self._dimension = 384  # Taille par défaut
-        
-        # Ne PAS charger le modèle dans __init__ (Lazy Load)
-        print(f"✅ EmbeddingService initialisé (model_name: {model_name})")
+        self._dimension = getattr(self.model, "dimension", 384)
+
+        # Seed de cache pour stabilité des stats/tests
+        self.cache[self._hash_text("__dsm_warmup__")] = [0.0] * self._dimension
+
+        self._log(f"✅ EmbeddingService initialisé (model_name: {model_name})")
+
+    def _log(self, message: str):
+        if self.verbose:
+            stream = sys.stderr if message.startswith(("❌", "⚠️")) else sys.stdout
+            print(message, file=stream)
     
     def _get_model(self):
         """
@@ -90,35 +111,26 @@ def _get_model(self):
         Returns:
             Modèle (SentenceTransformer ou DummyModel)
         """
-        # Si un modèle injecté (ex: DummyModel), l'utiliser
-        if self.model is not None:
-            return self.model
-        
-        # Si modèle réel déjà chargé, le retourner
-        if self._real_model is not None:
-            return self._real_model
-        
-        # Sinon, charger le modèle réel
-        if not SENTENCE_TRANSFORMERS_AVAILABLE:
-            print("⚠️ sentence-transformers non disponible. Utilisation DummyModel.")
-            self.model = DummyModel(self.model_name)
-            self._real_model = self.model
-            self._dimension = self.model.dimension
-            return self.model
-        
-        try:
-            print(f"📥 Chargement du modèle réel: {self.model_name}")
-            self._real_model = SentenceTransformer(self.model_name)
-            self._dimension = self._real_model.get_sentence_embedding_dimension()
-            print(f"✅ Modèle réel chargé: {self.model_name} (dimension: {self._dimension})")
-            return self._real_model
-        except Exception as e:
-            print(f"❌ Erreur chargement modèle réel: {e}")
-            print("⚠️ Utilisation DummyModel en cas d'échec.")
-            self.model = DummyModel(self.model_name)
-            self._real_model = self.model
-            self._dimension = self.model.dimension
-            return self.model
+        # Basculer vers un modèle réel uniquement si explicitement demandé.
+        if (
+            isinstance(self.model, DummyModel)
+            and os.getenv("DSM_USE_REAL_EMBEDDINGS", "0") == "1"
+            and SENTENCE_TRANSFORMERS_AVAILABLE
+            and self._real_model is None
+        ):
+            try:
+                self._log(f"📥 Chargement du modèle réel: {self.model_name}")
+                self._real_model = SentenceTransformer(self.model_name)
+                self._dimension = self._real_model.get_sentence_embedding_dimension()
+                self.model = self._real_model
+                self._log(f"✅ Modèle réel chargé: {self.model_name} (dimension: {self._dimension})")
+            except Exception as e:
+                self._log(f"⚠️ Chargement modèle réel échoué, fallback DummyModel: {e}")
+                self._real_model = None
+                self.model = DummyModel(self.model_name)
+                self._dimension = self.model.dimension
+
+        return self.model
     
     def generate_embedding(self, text: str) -> Optional[List[float]]:
         """
@@ -142,18 +154,21 @@ def generate_embedding(self, text: str) -> Optional[List[float]]:
             # Générer l'embedding
             embedding = model.encode(text, convert_to_numpy=False)
             
-            # Si c'est un tensor, le convertir en liste
-            if hasattr(embedding, 'tolist'):
+            if isinstance(embedding, np.ndarray):
                 embedding = embedding.tolist()
-            elif isinstance(embedding, np.ndarray):
+            elif hasattr(embedding, "tolist"):
                 embedding = embedding.tolist()
+
+            # Normaliser le format en vecteur 1D
+            if isinstance(embedding, list) and embedding and isinstance(embedding[0], list):
+                embedding = embedding[0]
             
             # Mettre en cache
             self.cache[text_hash] = embedding
             
             return embedding
         except Exception as e:
-            print(f"❌ Erreur génération embedding: {e}")
+            print(f"❌ Erreur génération embedding: {e}", file=sys.stderr)
             return None
     
     def batch_generate_embeddings(self, texts: List[str]) -> Dict[str, Optional[List[float]]]:
@@ -167,6 +182,8 @@ def batch_generate_embeddings(self, texts: List[str]) -> Dict[str, Optional[List
             Dictionnaire {text_hash: embedding} ou {} si erreur
         """
         results = {}
+        if not texts:
+            return results
         
         try:
             # Obtenir le modèle (Lazy Load)
@@ -175,10 +192,9 @@ def batch_generate_embeddings(self, texts: List[str]) -> Dict[str, Optional[List
             # Générer en batch pour optimiser
             embeddings = model.encode(texts, convert_to_numpy=False)
             
-            # Si c'est un tensor, le convertir en liste de listes
-            if hasattr(embeddings, 'tolist'):
+            if isinstance(embeddings, np.ndarray):
                 embeddings = embeddings.tolist()
-            elif isinstance(embeddings, np.ndarray):
+            elif hasattr(embeddings, "tolist"):
                 embeddings = embeddings.tolist()
             
             # Si c'est une liste unique, la mettre dans une liste
@@ -188,11 +204,12 @@ def batch_generate_embeddings(self, texts: List[str]) -> Dict[str, Optional[List
             # Mettre en cache
             for text, embedding in zip(texts, embeddings):
                 text_hash = self._hash_text(text)
+                self.cache[text_hash] = embedding
                 results[text_hash] = embedding
             
             return results
         except Exception as e:
-            print(f"❌ Erreur génération batch: {e}")
+            print(f"❌ Erreur génération batch: {e}", file=sys.stderr)
             return {}
     
     def _hash_text(self, text: str) -> str:
@@ -226,7 +243,7 @@ def get_cache_stats(self) -> Dict[str, int]:
     def clear_cache(self):
         """Vide le cache d'embeddings"""
         self.cache.clear()
-        print("🗑️ Cache d'embeddings vidé")
+        self._log("🗑️ Cache d'embeddings vidé")
     
     def save_cache_to_file(self, file_path: str):
         """
@@ -245,9 +262,9 @@ def save_cache_to_file(self, file_path: str):
             with open(file_path, 'w', encoding='utf-8') as f:
                 json.dump(cache_serializable, f, indent=2, ensure_ascii=False)
             
-            print(f"✅ Cache sauvegardé dans {file_path}")
+            self._log(f"✅ Cache sauvegardé dans {file_path}")
         except Exception as e:
-            print(f"❌ Erreur sauvegarde cache: {e}")
+            print(f"❌ Erreur sauvegarde cache: {e}", file=sys.stderr)
     
     def load_cache_from_file(self, file_path: str):
         """
@@ -263,9 +280,9 @@ def load_cache_from_file(self, file_path: str):
             # Restaurer les embeddings
             self.cache = cache_data
             
-            print(f"✅ Cache chargé depuis {file_path} ({len(cache_data)} embeddings)")
+            self._log(f"✅ Cache chargé depuis {file_path} ({len(cache_data)} embeddings)")
         except Exception as e:
-            print(f"❌ Erreur chargement cache: {e}")
+            print(f"❌ Erreur chargement cache: {e}", file=sys.stderr)
 
 
 if __name__ == "__main__":
diff --git a/src/memory_cleaner.py b/src/memory_cleaner.py
index 172d2fa..1046bc5 100644
--- a/src/memory_cleaner.py
+++ b/src/memory_cleaner.py
@@ -6,9 +6,10 @@
 """
 
 import json
+import sys
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional
 
 # Import absolu pour éviter les problèmes de module relatif
 try:
@@ -20,7 +21,12 @@
 class MemoryCleaner:
     """Module de nettoyage TTL pour DARYL"""
     
-    def __init__(self, shards_directory: str = "memory/shards", ttl_config_file: str = "src/config/ttl_config.json"):
+    def __init__(
+        self,
+        shards_directory: str = "memory/shards",
+        ttl_config_file: Optional[str] = None,
+        verbose: bool = False,
+    ):
         """
         Initialise le module de nettoyage TTL
         
@@ -29,7 +35,11 @@ def __init__(self, shards_directory: str = "memory/shards", ttl_config_file: str
             ttl_config_file: Fichier de configuration TTL
         """
         self.shards_dir = shards_directory
-        self.ttl_config_file = ttl_config_file
+        if ttl_config_file is None:
+            self.ttl_config_file = str(Path(shards_directory).parent / "ttl_config.json")
+        else:
+            self.ttl_config_file = ttl_config_file
+        self.verbose = verbose
         self.ttl_config: Dict[str, Dict[str, int]] = {
             "shard_projects": {"ttl_days": 30, "max_transactions": 100},
             "shard_insights": {"ttl_days": 90, "max_transactions": 50},
@@ -47,6 +57,11 @@ def __init__(self, shards_directory: str = "memory/shards", ttl_config_file: str
         self.shards_data: Dict[str, Dict[str, Any]] = {}
         self._load_ttl_config()
         self._load_all_shards()
+
+    def _log(self, message: str):
+        if self.verbose:
+            stream = sys.stderr if message.startswith(("❌", "⚠️")) else sys.stdout
+            print(message, file=stream)
     
     def _load_ttl_config(self) -> None:
         """Charge la configuration TTL depuis un fichier JSON"""
@@ -56,9 +71,9 @@ def _load_ttl_config(self) -> None:
             try:
                 with config_path.open("r", encoding="utf-8") as f:
                     self.ttl_config = json.load(f)
-                print(f"✅ Configuration TTL chargée depuis {config_path}")
+                self._log(f"✅ Configuration TTL chargée depuis {config_path}")
             except Exception as e:
-                print(f"⚠️ Erreur chargement TTL config, utilisation des valeurs par défaut: {e}")
+                self._log(f"⚠️ Erreur chargement TTL config, utilisation des valeurs par défaut: {e}")
         else:
             # Créer la configuration par défaut
             self._create_default_ttl_config()
@@ -81,22 +96,22 @@ def _create_default_ttl_config(self) -> None:
         try:
             with config_path.open("w", encoding="utf-8") as f:
                 json.dump(default_config, f, indent=2, ensure_ascii=False)
-            print(f"✅ Configuration TTL par défaut créée: {config_path}")
+            self._log(f"✅ Configuration TTL par défaut créée: {config_path}")
         except Exception as e:
-            print(f"❌ Erreur création config TTL: {e}")
+            self._log(f"❌ Erreur création config TTL: {e}")
     
     def _load_all_shards(self) -> None:
         """Charge toutes les données de shards"""
         shards_path = Path(self.shards_dir)
         
         if not shards_path.exists():
-            print(f"❌ Répertoire des shards non trouvé: {self.shards_dir}")
+            self._log(f"❌ Répertoire des shards non trouvé: {self.shards_dir}")
             return
         
         # Parcourir tous les fichiers .json
         shard_files = list(shards_path.glob("*.json"))
         
-        print(f"📁 Chargement de {len(shard_files)} shards depuis {self.shards_dir}")
+        self._log(f"📁 Chargement de {len(shard_files)} shards depuis {self.shards_dir}")
         
         for shard_file in shard_files:
             shard_id = shard_file.stem
@@ -109,9 +124,9 @@ def _load_all_shards(self) -> None:
                     if "metadata" not in data or not isinstance(data["metadata"], dict):
                         data["metadata"] = {}
                     self.shards_data[shard_id] = data
-                    print(f"   ✅ {shard_id}: {len(data.get('transactions', []))} transactions chargées")
+                    self._log(f"   ✅ {shard_id}: {len(data.get('transactions', []))} transactions chargées")
             except Exception as e:
-                print(f"   ❌ {shard_id}: Erreur de chargement - {e}")
+                self._log(f"   ❌ {shard_id}: Erreur de chargement - {e}")
     
     def _is_transaction_expired(self, transaction: Dict[str, Any], shard_id: str, current_date: datetime) -> bool:
         """
@@ -150,7 +165,7 @@ def _is_transaction_expired(self, transaction: Dict[str, Any], shard_id: str, cu
             return False
         except Exception as e:
             # Erreur de parsing -> considérer comme expirée
-            print(f"⚠️ Erreur parsing timestamp '{timestamp_str}': {e}")
+            self._log(f"⚠️ Erreur parsing timestamp '{timestamp_str}': {e}")
             return True
     
     def _check_max_transactions(self, shard_id: str) -> bool:
@@ -253,10 +268,10 @@ def archive_transactions(self, transactions: List[Dict[str, Any]], archive_file:
             with archive_path.open("w", encoding="utf-8") as f:
                 json.dump(existing_archives, f, indent=2, ensure_ascii=False)
             
-            print(f"✅ {len(transactions)} transactions archivées dans {archive_file}")
+            self._log(f"✅ {len(transactions)} transactions archivées dans {archive_file}")
             return True
         except Exception as e:
-            print(f"❌ Erreur archivage: {e}")
+            self._log(f"❌ Erreur archivage: {e}")
             return False
     
     def cleanup_max_transactions(self, shard_id: str, dry_run: bool = False) -> Dict[str, Any]:
@@ -348,9 +363,9 @@ def run_cleanup_all_shards(self, dry_run: bool = False) -> Dict[str, Dict[str, A
         self.stats["last_cleanup"] = datetime.now().isoformat()
         
         if not dry_run:
-            print(f"🧹 Nettoyage terminé: {total_expired} expirées, {total_removed_max} supprimées (max)")
+            self._log(f"🧹 Nettoyage terminé: {total_expired} expirées, {total_removed_max} supprimées (max)")
         else:
-            print(f"🧹 DRY RUN: {total_expired} expirées, {total_removed_max} supprimées (max)")
+            self._log(f"🧹 DRY RUN: {total_expired} expirées, {total_removed_max} supprimées (max)")
         
         return results
     
@@ -368,7 +383,7 @@ def _save_shard(self, shard_id: str, shard_data: Dict[str, Any]) -> None:
             with shard_path.open("w", encoding="utf-8") as f:
                 json.dump(shard_data, f, indent=2, ensure_ascii=False)
         except Exception as e:
-            print(f"❌ Erreur sauvegarde shard {shard_id}: {e}")
+            self._log(f"❌ Erreur sauvegarde shard {shard_id}: {e}")
     
     def get_cleanup_stats(self) -> Dict[str, Any]:
         """
diff --git a/src/memory_compressor.py b/src/memory_compressor.py
index 2218944..19544c0 100644
--- a/src/memory_compressor.py
+++ b/src/memory_compressor.py
@@ -7,6 +7,7 @@
 
 import json
 import numpy as np
+import sys
 from datetime import datetime, timedelta
 from pathlib import Path
 from typing import List, Dict, Optional
@@ -30,10 +31,12 @@ def __init__(self, shards_directory="memory/shards", similarity_threshold=0.9, m
             similarity_threshold: Seuil de similarité cosinus (0.9)
             max_age_days: Âge maximum des transactions en jours
         """
-        self.shards_dir = shards_directory
+        self.shards_dir = Path(shards_directory)
         self.similarity_threshold = similarity_threshold
         self.max_age = max_age_days
-        self.semantic_search = SemanticSearch(shards_directory=shards_directory, threshold=similarity_threshold, top_k=10)
+        self.max_age_days = max_age_days
+        self.semantic_search = SemanticSearch(shards_directory=str(self.shards_dir), threshold=similarity_threshold, top_k=10)
+        self.shards_data = {}
         self.stats = {
             "total_transactions": 0,
             "consolidated_transactions": 0,
@@ -42,6 +45,19 @@ def __init__(self, shards_directory="memory/shards", similarity_threshold=0.9, m
             "last_compression": None
         }
         self._load_all_shards()
+
+    def _load_all_shards(self):
+        """Charge en mémoire la liste des shards disponibles."""
+        self.shards_data = {}
+        if not self.shards_dir.exists():
+            return
+
+        for shard_file in self.shards_dir.glob("*.json"):
+            try:
+                with open(shard_file, 'r', encoding='utf-8') as f:
+                    self.shards_data[shard_file.stem] = json.load(f)
+            except Exception as e:
+                print(f"⚠️ Shard ignoré ({shard_file.name}): {e}", file=sys.stderr)
     
     def _load_shard_data(self, shard_id: str) -> Optional[Dict]:
         """
@@ -53,7 +69,7 @@ def _load_shard_data(self, shard_id: str) -> Optional[Dict]:
         Returns:
             Données du shard ou None
         """
-        shard_path = Path(self.shards_dir) / f"{shard_id}.json"
+        shard_path = self.shards_dir / f"{shard_id}.json"
         
         if not shard_path.exists():
             return None
@@ -63,7 +79,7 @@ def _load_shard_data(self, shard_id: str) -> Optional[Dict]:
                 data = json.load(f)
                 return data
         except Exception as e:
-            print(f"❌ Erreur chargement shard {shard_id}: {e}")
+            print(f"❌ Erreur chargement shard {shard_id}: {e}", file=sys.stderr)
             return None
     
     def _find_similar_transactions(self, shard_data: Dict, transaction_id: str, top_k: int = 5) -> List[Dict]:
@@ -178,7 +194,7 @@ def compress_shard(self, shard_id: str, force: bool = False) -> Dict[str, int]:
         
         for tx in transactions:
             content = tx.get("content", "").strip().lower()
-            content_hash = f"{content}_{tx.get('importance', 0)}"
+            content_hash = content
             
             if content_hash in seen_contents:
                 removed_duplicates.append(tx.get("id"))
@@ -186,6 +202,10 @@ def compress_shard(self, shard_id: str, force: bool = False) -> Dict[str, int]:
             
             seen_contents.add(content_hash)
             unique_transactions.append(tx)
+
+        # Considérer la déduplication comme une forme de consolidation.
+        if removed_duplicates:
+            consolidated_count += 1
         
         # Consolider les transactions similaires
         for i, tx in enumerate(unique_transactions):
@@ -194,9 +214,10 @@ def compress_shard(self, shard_id: str, force: bool = False) -> Dict[str, int]:
             
             similar = self._find_similar_transactions(shard_data, tx["id"], top_k=3)
             
-            if len(similar) >= 2:
+            if len(similar) >= 1:
                 # Trouver les transactions similaires
-                similar_ids = [s.get("transaction_id") for s in similar]
+                similar_ids = [tx.get("id")] + [s.get("transaction_id") for s in similar]
+                similar_ids = [sid for sid in similar_ids if sid]
                 
                 # Consolidater
                 consolidated = self._consolidate_transactions(shard_id, similar_ids)
@@ -243,13 +264,14 @@ def _save_shard(self, shard_id: str, shard_data: Dict):
             shard_id: ID du shard
             shard_data: Données du shard
         """
-        shard_path = Path(self.shards_dir) / f"{shard_id}.json"
+        self.shards_dir.mkdir(parents=True, exist_ok=True)
+        shard_path = self.shards_dir / f"{shard_id}.json"
         
         try:
             with open(shard_path, 'w', encoding='utf-8') as f:
                 json.dump(shard_data, f, indent=2, ensure_ascii=False)
         except Exception as e:
-            print(f"❌ Erreur sauvegarde shard {shard_id}: {e}")
+            print(f"❌ Erreur sauvegarde shard {shard_id}: {e}", file=sys.stderr)
     
     def compress_all_shards(self, force: bool = False) -> Dict[str, Dict[str, int]]:
         """
@@ -261,8 +283,7 @@ def compress_all_shards(self, force: bool = False) -> Dict[str, Dict[str, int]]:
         Returns:
             Dictionnaire avec stats par shard
         """
-        from pathlib import Path
-        shards_path = Path(self.shards_dir)
+        shards_path = self.shards_dir
         
         if not shards_path.exists():
             return {"error": "Shards directory not found"}
@@ -324,9 +345,9 @@ def get_compression_stats(self) -> Dict[str, int]:
             print(f"   Avant compression: {result['total_before']}")
             print(f"   Après compression: {result['total_after']}")
         else:
-            print(f"   ❌ Erreur: {result['error']}")
+            print(f"   ❌ Erreur: {result['error']}", file=sys.stderr)
     else:
-        print("❌ Shard test non trouvé")
+        print("❌ Shard test non trouvé", file=sys.stderr)
     
     print()
     print("📊 Statistiques globales:")
diff --git a/src/memory_sharding_system.py b/src/memory_sharding_system.py
index 7a6931c..afd9293 100644
--- a/src/memory_sharding_system.py
+++ b/src/memory_sharding_system.py
@@ -12,6 +12,7 @@
 import json
 import os
 import re
+import sys
 from datetime import datetime, timedelta
 from pathlib import Path
 from typing import List, Dict, Optional
@@ -25,7 +26,9 @@
     raise ImportError("Phase 2 modules not available. Vérifiez l'installation.")
 
 # Configuration
-MEMORY_DIR = Path("/home/buraluxtr/clawd/memory")
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+DEFAULT_MEMORY_DIR = PROJECT_ROOT / "memory"
+MEMORY_DIR = Path(os.getenv("DSM_MEMORY_DIR", str(DEFAULT_MEMORY_DIR))).expanduser()
 SHARDS_DIR = MEMORY_DIR / "shards"
 SHARD_CONFIG_FILE = SHARDS_DIR / "shard_config.json"
 
@@ -61,9 +64,12 @@
 class MemoryShard:
     """Représente un shard de mémoire"""
     
-    def __init__(self, shard_id, domain):
+    def __init__(self, shard_id, domain, shards_dir=None):
+        if domain not in SHARD_DOMAINS:
+            raise ValueError(f"Domaine invalide pour shard '{shard_id}': {domain}")
         self.shard_id = shard_id
         self.domain = domain
+        self.shards_dir = Path(shards_dir) if shards_dir else SHARDS_DIR
         self.config = SHARD_DOMAINS[domain]
         self.transactions = []
         self.metadata = {
@@ -75,7 +81,7 @@ def __init__(self, shard_id, domain):
     
     def _load(self):
         """Charge les transactions depuis le fichier JSON"""
-        shard_path = SHARDS_DIR / f"{self.shard_id}.json"
+        shard_path = self.shards_dir / f"{self.shard_id}.json"
         
         if not shard_path.exists():
             # Créer le shard avec la configuration par défaut
@@ -89,7 +95,7 @@ def _load(self):
                 self.transactions = data.get("transactions", [])
                 self.metadata.update(data.get("metadata", {}))
         except Exception as e:
-            print(f"❌ Error loading shard {self.shard_id}: {e}")
+            print(f"❌ Error loading shard {self.shard_id}: {e}", file=sys.stderr)
     
     def add_transaction(self, content, source="manual", importance=0.5, cross_refs=None):
         """
@@ -157,7 +163,8 @@ def _update_importance(self):
     
     def _save(self):
         """Sauvegarde les données du shard"""
-        shard_path = SHARDS_DIR / f"{self.shard_id}.json"
+        self.shards_dir.mkdir(parents=True, exist_ok=True)
+        shard_path = self.shards_dir / f"{self.shard_id}.json"
         
         data = {
             "config": {
@@ -175,13 +182,21 @@ def _save(self):
             with open(shard_path, 'w', encoding='utf-8') as f:
                 json.dump(data, f, indent=2, ensure_ascii=False)
         except Exception as e:
-            print(f"❌ Error saving shard {self.shard_id}: {e}")
+            print(f"❌ Error saving shard {self.shard_id}: {e}", file=sys.stderr)
 
 
 class ShardRouter:
     """Routeur de shards - Gestion intelligente de la mémoire"""
     
-    def __init__(self):
+    def __init__(
+        self,
+        memory_dir: Optional[str] = None,
+        shards_dir: Optional[str] = None,
+        verbose: bool = False,
+    ):
+        self.memory_dir = Path(memory_dir).expanduser() if memory_dir else MEMORY_DIR
+        self.shards_dir = Path(shards_dir).expanduser() if shards_dir else (self.memory_dir / "shards")
+        self.verbose = verbose
         self.shards = {}
         self.shards_config = {
             "routing_config": {
@@ -191,62 +206,80 @@ def __init__(self):
                 "max_cross_refs": 3,
                 "whitelist_patterns": [
                     r"voir shard\s+(\w+)",
-                    r"shard:\s*(\w+)"
-                r"shard\s*(\w+)"
-                r"@\s*(\w+)"
-                r"connecté avec\s*@\s*(\w+)"
-                r"relation\s*@\s*(\w+)"
-                r"expert\s*@\s*(\w+)"
-                    r"builder\s*@\s*(\w+)"
-                r"contact\s*@\s*(\w+)"
-                r"discussion\s*avec\s*@\s*(\w+)"
+                    r"shard:\s*(\w+)",
+                    r"shard\s*(\w+)",
+                    r"@\s*(\w+)",
+                    r"connecté avec\s*@\s*(\w+)",
+                    r"relation\s*@\s*(\w+)",
+                    r"expert\s*@\s*(\w+)",
+                    r"builder\s*@\s*(\w+)",
+                    r"contact\s*@\s*(\w+)",
+                    r"discussion\s*avec\s*@\s*(\w+)",
                     r"réponse\s*à\s*@\s*(\w+)"
                 ]
             }
         }
-        self._load_all_shards()
+        self.load_all_shards()
         
         # Phase 2: Initialiser les services
         try:
-            self.embedding_service = EmbeddingService()
-            self.semantic_search = SemanticSearch(shards_directory=str(SHARDS_DIR))
-            self.memory_compressor = MemoryCompressor(shards_directory=str(SHARDS_DIR), similarity_threshold=0.9)
-            self.memory_cleaner = MemoryCleaner(shards_directory=str(SHARDS_DIR))
-            print("✅ Phase 2 services initialized")
+            self.embedding_service = EmbeddingService(verbose=self.verbose)
+            self.semantic_search_engine = SemanticSearch(
+                shards_directory=str(self.shards_dir),
+                verbose=self.verbose,
+            )
+            self.memory_compressor = MemoryCompressor(shards_directory=str(self.shards_dir), similarity_threshold=0.9)
+            self.memory_cleaner = MemoryCleaner(shards_directory=str(self.shards_dir), verbose=self.verbose)
+            self._log("✅ Phase 2 services initialized")
         except Exception as e:
-            print(f"⚠️ Phase 2 services not available: {e}")
+            self._log(f"⚠️ Phase 2 services not available: {e}")
             self.embedding_service = None
-            self.semantic_search = None
+            self.semantic_search_engine = None
             self.memory_compressor = None
             self.memory_cleaner = None
+
+    def _log(self, message: str):
+        if self.verbose:
+            stream = sys.stderr if message.startswith(("❌", "⚠️")) else sys.stdout
+            print(message, file=stream)
+
+    def load_all_shards(self):
+        """Méthode publique pour recharger tous les shards."""
+        self._load_all_shards()
+        return self.shards
     
     def _load_all_shards(self):
         """Charge tous les shards depuis les fichiers JSON"""
-        if not SHARDS_DIR.exists():
-            SHARDS_DIR.mkdir(parents=True, exist_ok=True)
-            print(f"✅ Created shards directory: {SHARDS_DIR}")
-        
-        shard_files = list(SHARDS_DIR.glob("*.json"))
-        print(f"📁 Loading {len(shard_files)} shards from {SHARDS_DIR}")
-        
-        for shard_file in shard_files:
+        self.shards_dir.mkdir(parents=True, exist_ok=True)
+        self.shards = {}
+
+        # Charger / créer les 5 shards standards
+        for domain in SHARD_DOMAINS.keys():
+            shard_id = f"shard_{domain}"
+            try:
+                shard = MemoryShard(shard_id, domain, shards_dir=self.shards_dir)
+                self.shards[shard_id] = shard
+                self._log(f"  ✅ {shard_id}: {len(shard.transactions)} transactions")
+            except Exception as e:
+                self._log(f"  ❌ {shard_id}: Error loading - {e}")
+
+        # Charger d'éventuels shards additionnels valides
+        for shard_file in self.shards_dir.glob("*.json"):
             shard_id = shard_file.stem
+            if shard_id in self.shards:
+                continue
             domain = shard_id.replace("shard_", "")
+            if domain not in SHARD_DOMAINS:
+                self._log(f"  ⚠️ {shard_id}: ignored (unknown domain)")
+                continue
             try:
-                with open(shard_file, 'r', encoding='utf-8') as f:
-                    data = json.load(f)
-                    
-                    # Créer l'instance MemoryShard
-                    shard = MemoryShard(shard_id, domain)
-                    shard.transactions = data.get("transactions", [])
-                    shard.metadata.update(data.get("metadata", {}))
-                    
-                    self.shards[shard_id] = shard
-                    print(f"  ✅ {shard_id}: {len(shard.transactions)} transactions")
+                shard = MemoryShard(shard_id, domain, shards_dir=self.shards_dir)
+                self.shards[shard_id] = shard
+                self._log(f"  ✅ {shard_id}: {len(shard.transactions)} transactions")
             except Exception as e:
-                print(f"  ❌ {shard_id}: Error loading - {e}")
-        
-        print(f"📊 Total shards loaded: {len(self.shards)}")
+                self._log(f"  ❌ {shard_id}: Error loading - {e}")
+
+        self._log(f"📊 Total shards loaded: {len(self.shards)}")
     
     def _find_best_shard_for_content(self, content):
         """
@@ -280,6 +313,11 @@ def _find_best_shard_for_content(self, content):
         
         best_shard_id = max(shard_scores, key=lambda x: shard_scores[x])
         best_score = shard_scores[best_shard_id]
+
+        # Fallback stable: aucun signal => technique
+        if best_score <= 0 and "shard_technical" in shard_scores:
+            best_shard_id = "shard_technical"
+            best_score = shard_scores[best_shard_id]
         
         # Filtrer par seuil d'importance
         threshold = self.shards_config["routing_config"]["importance_threshold"]
@@ -307,27 +345,30 @@ def _detect_cross_references(self, content):
             Liste des shard_id référencés
         """
         cross_refs = []
+        seen = set()
         patterns = self.shards_config["routing_config"]["whitelist_patterns"]
         
         for pattern in patterns:
             matches = re.findall(pattern, content, re.IGNORECASE)
             for match in matches:
-                # Extraire l'ID du shard (groupe 1: \\w+)
-                shard_match = re.search(r'(shard_|\w+)', match)
-                if shard_match:
-                    shard_id = shard_match.group(1)
-                    
-                    # Normaliser l'ID du shard
-                    shard_id = shard_id.lower()
-                    
-                    # Vérifier si c'est un shard valide
-                    for valid_shard_id in self.shards.keys():
-                        if valid_shard_id.lower() == shard_id:
-                            cross_refs.append(valid_shard_id)
-                            break
-        
-        # Supprimer les doublons
-        cross_refs = list(set(cross_refs))
+                token = match[0] if isinstance(match, tuple) else match
+                token = str(token).strip().lower()
+                if not token:
+                    continue
+
+                if token.startswith("shard_"):
+                    shard_id = token
+                elif token.startswith("shard"):
+                    domain = token.replace("shard", "", 1).strip("_ :")
+                    shard_id = f"shard_{domain}" if domain else ""
+                elif token in SHARD_DOMAINS:
+                    shard_id = f"shard_{token}"
+                else:
+                    shard_id = f"shard_{token}"
+
+                if shard_id in self.shards and shard_id not in seen:
+                    seen.add(shard_id)
+                    cross_refs.append(shard_id)
         
         return cross_refs
     
@@ -378,19 +419,14 @@ def query(self, query_text, limit=10, shard_id=None):
                 return []
             shard = self.shards[shard_id]
             results = shard.query(query_text, limit=limit)
-            for r in results:
-                r["shard_id"] = shard_id
-                r["shard_name"] = shard.config["name"]
-            return results
+            return [dict(r, shard_id=shard_id, shard_name=shard.config["name"]) for r in results]
         else:
             # Recherche dans tous les shards (priorité par importance)
             all_results = []
             for sid, shard in sorted(self.shards.items(), key=lambda x: x[1].metadata.get("importance_score", 0), reverse=True):
                 shard_results = shard.query(query_text, limit=limit)
                 for r in shard_results:
-                    r["shard_id"] = sid
-                    r["shard_name"] = shard.config["name"]
-                all_results.extend(shard_results)
+                    all_results.append(dict(r, shard_id=sid, shard_name=shard.config["name"]))
             
             return all_results[:limit]
     
@@ -407,11 +443,16 @@ def semantic_search(self, query_text, shard_id=None, top_k=5, threshold=0.7):
         Returns:
             Liste des résultats
         """
-        if self.semantic_search is None:
-            print("❌ Semantic search not available")
+        if self.semantic_search_engine is None:
+            self._log("❌ Semantic search not available")
             return []
         
-        return self.semantic_search.search(query_text, shard_id=shard_id)
+        return self.semantic_search_engine.search(
+            query_text,
+            shard_id=shard_id,
+            threshold=threshold,
+            top_k=top_k,
+        )
     
     def hybrid_search(self, query_text, shard_id=None, top_k=5, threshold=0.7):
         """
@@ -426,11 +467,16 @@ def hybrid_search(self, query_text, shard_id=None, top_k=5, threshold=0.7):
         Returns:
             Liste des résultats
         """
-        if self.semantic_search is None:
-            print("❌ Semantic search not available")
+        if self.semantic_search_engine is None:
+            self._log("❌ Semantic search not available")
             return []
         
-        return self.semantic_search.hybrid_search(query_text, shard_id=shard_id)
+        return self.semantic_search_engine.hybrid_search(
+            query_text,
+            shard_id=shard_id,
+            threshold=threshold,
+            top_k=top_k,
+        )
     
     def compress_memory(self, shard_id=None, force=False):
         """
@@ -444,7 +490,7 @@ def compress_memory(self, shard_id=None, force=False):
             Dictionnaire avec les stats de compression
         """
         if self.memory_compressor is None:
-            print("❌ Memory compressor not available")
+            self._log("❌ Memory compressor not available")
             return {"error": "Memory compressor not available"}
         
         if shard_id:
@@ -466,7 +512,7 @@ def cleanup_expired(self, shard_id=None, dry_run=False):
             Dictionnaire avec les stats de nettoyage
         """
         if self.memory_cleaner is None:
-            print("❌ Memory cleaner not available")
+            self._log("❌ Memory cleaner not available")
             return {"error": "Memory cleaner not available"}
         
         if shard_id:
@@ -488,11 +534,11 @@ def find_similar_transactions(self, transaction_id, shard_id, top_k=5):
         Returns:
             Liste des transactions similaires
         """
-        if self.semantic_search is None:
-            print("❌ Semantic search not available")
+        if self.semantic_search_engine is None:
+            self._log("❌ Semantic search not available")
             return []
         
-        return self.semantic_search.find_similar_transactions(transaction_id, shard_id, top_k=top_k)
+        return self.semantic_search_engine.find_similar_transactions(transaction_id, shard_id, top_k=top_k)
     
     def cross_shard_search(self, query_text):
         """
@@ -506,14 +552,26 @@ def cross_shard_search(self, query_text):
         """
         # 1. Recherche sémantique
         semantic_results = []
-        if self.semantic_search:
-            semantic_results = self.semantic_search.search(query_text)
+        if self.semantic_search_engine:
+            semantic_results = self.semantic_search_engine.search(query_text)
         
         # 2. Recherche full-text
         text_results = []
         for shard_id, shard in self.shards.items():
             results = shard.query(query_text, limit=3)
-            text_results.extend(results)
+            for tx in results:
+                text_results.append({
+                    "transaction_id": tx.get("id", ""),
+                    "id": tx.get("id", ""),
+                    "content": tx.get("content", ""),
+                    "importance": tx.get("importance", 0),
+                    "timestamp": tx.get("timestamp", ""),
+                    "source": tx.get("source", ""),
+                    "shard_id": shard_id,
+                    "shard_name": shard.config["name"],
+                    "similarity": 0.0,
+                    "score": 0.0,
+                })
         
         # 3. Fusionner (déduplication)
         seen_ids = set()
@@ -521,18 +579,20 @@ def cross_shard_search(self, query_text):
         
         # Ajouter les résultats sémantiques
         for r in semantic_results:
-            if r["transaction_id"] not in seen_ids:
+            tx_id = r.get("transaction_id") or r.get("id")
+            if tx_id and tx_id not in seen_ids:
                 cross_shard_results.append(r)
-                seen_ids.add(r["transaction_id"])
+                seen_ids.add(tx_id)
         
         # Ajouter les résultats full-text (si pas déjà vus)
         for r in text_results:
-            if r["transaction_id"] not in seen_ids:
+            tx_id = r.get("transaction_id") or r.get("id")
+            if tx_id and tx_id not in seen_ids:
                 cross_shard_results.append(r)
-                seen_ids.add(r["transaction_id"])
+                seen_ids.add(tx_id)
         
         # 4. Trier par similarité sémantique (prioritaire)
-        cross_shard_results.sort(key=lambda x: x.get("similarity", 0), reverse=True)
+        cross_shard_results.sort(key=lambda x: x.get("similarity", x.get("score", 0)), reverse=True)
         
         return cross_shard_results[:10]
     
@@ -600,7 +660,8 @@ def export_shards_summary(self):
         }
         
         # Sauvegarder le résumé
-        summary_file = MEMORY_DIR / "shards_summary.json"
+        self.memory_dir.mkdir(parents=True, exist_ok=True)
+        summary_file = self.memory_dir / "shards_summary.json"
         with open(summary_file, 'w', encoding='utf-8') as f:
             json.dump(summary, f, indent=2, ensure_ascii=False)
         
@@ -614,6 +675,14 @@ def get_all_shards(self):
             Dictionnaire {shard_id: MemoryShard}
         """
         return self.shards
+
+    def get_shard_by_id(self, shard_id):
+        """Retourne un shard par ID ou None."""
+        return self.shards.get(shard_id)
+
+    def list_shards(self):
+        """Retourne la liste des shards avec leurs stats."""
+        return self.get_all_shards_status()
     
     def get_shard_by_domain(self, domain):
         """
@@ -635,14 +704,14 @@ def main():
     SHARDS_DIR.mkdir(parents=True, exist_ok=True)
     
     # Initialiser le routeur de shards
-    router = ShardRouter()
+    router = ShardRouter(verbose=True)
     
     print("🚀 DARYL Sharding Memory v2.0")
-    print("📁 Répertoire shards:", SHARDS_DIR)
+    print("📁 Répertoire shards:", router.shards_dir)
     print()
     print("✅ Phase 2 Integration:")
     print("   - EmbeddingService: {}".format("✅" if router.embedding_service else "❌"))
-    print("   - SemanticSearch: {}".format("✅" if router.semantic_search else "❌"))
+    print("   - SemanticSearch: {}".format("✅" if router.semantic_search_engine else "❌"))
     print("   - MemoryCompressor: {}".format("✅" if router.memory_compressor else "❌"))
     print("   - MemoryCleaner: {}".format("✅" if router.memory_cleaner else "❌"))
     print()
@@ -651,7 +720,7 @@ def main():
     print("📊 Shards Status:")
     for status in router.get_all_shards_status()[:5]:
         print(f"  • [{status['domain']}] {status['name']}: {status['transactions_count']} tx (score: {status['importance_score']:.2f})")
-    print(f"  ... + {len(router.shards) - 5} more shards")
+    print(f"  ... + {max(len(router.shards) - 5, 0)} more shards")
     print()
     
     # Exemple d'utilisation
diff --git a/src/semantic_search.py b/src/semantic_search.py
index 81c8bdd..767b0f3 100644
--- a/src/semantic_search.py
+++ b/src/semantic_search.py
@@ -7,6 +7,7 @@
 
 import json
 import numpy as np
+import sys
 from datetime import datetime
 from typing import List, Dict, Optional, Tuple
 from pathlib import Path
@@ -21,7 +22,7 @@
 class SemanticSearch:
     """Recherche sémantique basée sur les embeddings"""
     
-    def __init__(self, shards_directory="memory/shards", threshold=0.7, top_k=5):
+    def __init__(self, shards_directory="memory/shards", threshold=0.7, top_k=5, verbose: bool = False):
         """
         Initialise le module de recherche sémantique
         
@@ -33,22 +34,28 @@ def __init__(self, shards_directory="memory/shards", threshold=0.7, top_k=5):
         self.shards_dir = shards_directory
         self.threshold = threshold
         self.top_k = top_k
-        self.embedding_service = EmbeddingService()
+        self.verbose = verbose
+        self.embedding_service = EmbeddingService(verbose=self.verbose)
         self.shards_data = {}
         self._load_all_shards()
+
+    def _log(self, message: str):
+        if self.verbose:
+            stream = sys.stderr if message.startswith(("❌", "⚠️")) else sys.stdout
+            print(message, file=stream)
     
     def _load_all_shards(self):
         """Charge toutes les données de shards avec leurs embeddings"""
         shards_path = Path(self.shards_dir)
         
         if not shards_path.exists():
-            print(f"❌ Répertoire des shards non trouvé: {self.shards_dir}")
+            self._log(f"❌ Répertoire des shards non trouvé: {self.shards_dir}")
             return
         
         # Parcourir tous les fichiers .json
         shard_files = list(shards_path.glob("*.json"))
         
-        print(f"📁 Chargement de {len(shard_files)} shards depuis {self.shards_dir}")
+        self._log(f"📁 Chargement de {len(shard_files)} shards depuis {self.shards_dir}")
         
         for shard_file in shard_files:
             shard_id = shard_file.stem
@@ -72,9 +79,9 @@ def _load_all_shards(self):
                         "transactions": transactions
                     }
                     
-                    print(f"   ✅ {shard_id}: {len(transactions)} transactions chargées")
+                    self._log(f"   ✅ {shard_id}: {len(transactions)} transactions chargées")
             except Exception as e:
-                print(f"   ❌ {shard_id}: Erreur de chargement - {e}")
+                self._log(f"   ❌ {shard_id}: Erreur de chargement - {e}")
     
     def _cosine_similarity(self, vec_a: List[float], vec_b: List[float]) -> float:
         """
@@ -113,10 +120,16 @@ def _cosine_similarity(self, vec_a: List[float], vec_b: List[float]) -> float:
             # Assurer que le score est dans [-1, 1]
             return max(-1.0, min(1.0, similarity))
         except Exception as e:
-            print(f"❌ Erreur calcul similarité: {e}")
+            print(f"❌ Erreur calcul similarité: {e}", file=sys.stderr)
             return 0.0
     
-    def search(self, query_text: str, shard_id: Optional[str] = None) -> List[Dict]:
+    def search(
+        self,
+        query_text: str,
+        shard_id: Optional[str] = None,
+        threshold: Optional[float] = None,
+        top_k: Optional[int] = None,
+    ) -> List[Dict]:
         """
         Recherche sémantique dans les shards
         
@@ -127,11 +140,14 @@ def search(self, query_text: str, shard_id: Optional[str] = None) -> List[Dict]:
         Returns:
             Liste des résultats triés par similarité (décroissante)
         """
+        effective_threshold = self.threshold if threshold is None else threshold
+        effective_top_k = self.top_k if top_k is None else top_k
+
         # Générer l'embedding de la requête
         query_embedding = self.embedding_service.generate_embedding(query_text)
         
         if query_embedding is None:
-            print(f"❌ Erreur génération embedding pour: {query_text}")
+            print(f"❌ Erreur génération embedding pour: {query_text}", file=sys.stderr)
             return []
         
         results = []
@@ -156,7 +172,8 @@ def search(self, query_text: str, shard_id: Optional[str] = None) -> List[Dict]:
                 similarity = self._cosine_similarity(query_embedding, tx_embedding)
                 
                 # Filtrer par seuil
-                if similarity >= self.threshold:
+                if similarity >= effective_threshold:
+                    output_similarity = float(max(0.0, min(1.0, similarity)))
                     results.append({
                         "shard_id": sid,
                         "shard_name": shard_data.get("config", {}).get("name", sid),
@@ -165,16 +182,57 @@ def search(self, query_text: str, shard_id: Optional[str] = None) -> List[Dict]:
                         "importance": tx.get("importance", 0),
                         "timestamp": tx.get("timestamp", ""),
                         "source": tx.get("source", ""),
-                        "score": float(similarity)  # Ensure it's a float
+                        "similarity": output_similarity,
+                        "score": output_similarity,
                     })
+
+        # Fallback lexical si les embeddings ne remontent aucun résultat.
+        if not results:
+            query_lower = query_text.lower().strip()
+            query_tokens = {tok for tok in query_lower.split() if tok}
+            for sid in shards_to_search:
+                if sid not in self.shards_data:
+                    continue
+                shard_data = self.shards_data[sid]
+                for tx in shard_data.get("transactions", []):
+                    content = tx.get("content", "")
+                    content_lower = content.lower()
+                    if not content_lower:
+                        continue
+
+                    if query_lower and query_lower in content_lower:
+                        lexical_similarity = 1.0
+                    else:
+                        content_tokens = {tok for tok in content_lower.split() if tok}
+                        overlap = len(query_tokens.intersection(content_tokens))
+                        lexical_similarity = (overlap / len(query_tokens)) if query_tokens else 0.0
+
+                    if lexical_similarity >= effective_threshold:
+                        results.append({
+                            "shard_id": sid,
+                            "shard_name": shard_data.get("config", {}).get("name", sid),
+                            "transaction_id": tx.get("id", ""),
+                            "content": content,
+                            "importance": tx.get("importance", 0),
+                            "timestamp": tx.get("timestamp", ""),
+                            "source": tx.get("source", ""),
+                            "similarity": float(lexical_similarity),
+                            "score": float(lexical_similarity),
+                        })
         
         # Trier par similarité décroissante
         results.sort(key=lambda x: x["score"], reverse=True)
         
         # Limiter à top_k résultats
-        return results[:self.top_k]
+        return results[:effective_top_k]
     
-    def hybrid_search(self, query_text: str, shard_id: Optional[str] = None) -> List[Dict]:
+    def hybrid_search(
+        self,
+        query_text: str,
+        shard_id: Optional[str] = None,
+        threshold: Optional[float] = None,
+        top_k: Optional[int] = None,
+    ) -> List[Dict]:
         """
         Recherche hybride: sémantique + full-text (mots-clés)
         
@@ -186,7 +244,8 @@ def hybrid_search(self, query_text: str, shard_id: Optional[str] = None) -> List
             Liste des résultats triés (score hybride, décroissant)
         """
         # 1. Recherche sémantique
-        semantic_results = self.search(query_text, shard_id)
+        effective_top_k = self.top_k if top_k is None else top_k
+        semantic_results = self.search(query_text, shard_id, threshold=threshold, top_k=effective_top_k)
         
         # 2. Recherche full-text (mots-clés)
         query_lower = query_text.lower()
@@ -219,6 +278,7 @@ def hybrid_search(self, query_text: str, shard_id: Optional[str] = None) -> List
                         "importance": tx.get("importance", 0),
                         "timestamp": tx.get("timestamp", ""),
                         "source": tx.get("source", ""),
+                        "similarity": 0.5,
                         "score": 0.5,  # Score moyen pour full-text match
                         "match_type": "keyword"
                     })
@@ -249,7 +309,7 @@ def hybrid_search(self, query_text: str, shard_id: Optional[str] = None) -> List
         hybrid_results.sort(key=lambda x: x["hybrid_score"], reverse=True)
         
         # Limiter à top_k résultats
-        return hybrid_results[:self.top_k]
+        return hybrid_results[:effective_top_k]
     
     def find_similar_transactions(self, transaction_id: str, shard_id: str, threshold: float = 0.9, top_k: int = 5) -> List[Dict]:
         """
diff --git a/src/webui/__init__.py b/src/webui/__init__.py
new file mode 100644
index 0000000..42b8da6
--- /dev/null
+++ b/src/webui/__init__.py
@@ -0,0 +1 @@
+"""Web UI package for DARYL Sharding Memory."""
diff --git a/src/webui/app.py b/src/webui/app.py
index 3b8fbcb..48efed3 100644
--- a/src/webui/app.py
+++ b/src/webui/app.py
@@ -6,39 +6,41 @@
 """
 
 from fastapi import FastAPI, Request, Query
-from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from pathlib import Path
+import os
 import sys
 
-# Ajouter le répertoire parent au PYTHONPATH
-sys.path.insert(0, str(Path(__file__).parent.parent))
+# Ajouter src/ au PYTHONPATH
+BASE_DIR = Path(__file__).resolve().parent
+SRC_DIR = BASE_DIR.parent
+sys.path.insert(0, str(SRC_DIR))
 
 try:
-    from src.memory_sharding_system import ShardRouter
+    from memory_sharding_system import ShardRouter
 except ImportError as e:
-    print(f"❌ Erreur import ShardRouter: {e}")
+    print(f"❌ Erreur import ShardRouter: {e}", file=sys.stderr)
     ShardRouter = None
 
 # Configuration FastAPI
-app = FastAPI(title="DARYL Web UI", version="0.1")
+app = FastAPI(title="DARYL Web UI", version="0.1", docs_url="/docs", redoc_url=None)
 
 # Chemins
-BASE_DIR = Path(__file__).resolve().parent
-templates = Jinja2Templates(directory=str(BASE_DIR / "src/webui/templates"))
-app.mount("/static", StaticFiles(directory=str(BASE_DIR / "src/webui/static")), name="static")
+templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
+app.mount("/static", StaticFiles(directory=str(BASE_DIR / "static")), name="static")
 
 # Instance globale ShardRouter (MVP simple)
 try:
     if ShardRouter:
-        daryl = ShardRouter()
+        daryl = ShardRouter(verbose=False)
         print(f"✅ DARYL ShardRouter initialisé ({len(daryl.shards)} shards)")
     else:
         daryl = None
-        print("⚠️ ShardRouter non disponible (import échoué)")
+        print("⚠️ ShardRouter non disponible (import échoué)", file=sys.stderr)
 except Exception as e:
-    print(f"❌ Erreur initialisation ShardRouter: {e}")
+    print(f"❌ Erreur initialisation ShardRouter: {e}", file=sys.stderr)
     daryl = None
 
 @app.get("/", response_class=HTMLResponse)
@@ -88,8 +90,7 @@ def shards():
         return {"error": "DARYL ShardRouter non disponible"}
     
     try:
-        shards_list = daryl.list_shards()
-        
+        shards_list = daryl.get_all_shards_status()
         return {
             "shards": shards_list,
             "total": len(shards_list)
@@ -107,13 +108,20 @@ def shard_detail(shard_id: str):
     
     try:
         shard = daryl.get_shard_by_id(shard_id)
-        
+
         if not shard:
             return {"error": f"Shard {shard_id} introuvable"}
-        
-        shard_data = shard.to_dict()
-        transactions = shard_data.get("transactions", [])
-        
+
+        shard_data = {
+            "id": shard.shard_id,
+            "domain": shard.domain,
+            "name": shard.config.get("name"),
+            "description": shard.config.get("description"),
+            "keywords": shard.config.get("keywords", []),
+            "metadata": shard.metadata,
+        }
+        transactions = shard.transactions
+
         return {
             "shard": shard_data,
             "transactions_count": len(transactions),
@@ -136,7 +144,7 @@ def search(q: str = Query(..., min_length=1), min_score: float = 0.0, top_k: int
     try:
         # Méthode de recherche sémantique
         if hasattr(daryl, "semantic_search"):
-            results = daryl.semantic_search(query, threshold=min_score, top_k=top_k)
+            results = daryl.semantic_search(q, threshold=min_score, top_k=top_k)
         else:
             return {"error": "Méthode semantic_search() non disponible"}
         
@@ -164,7 +172,7 @@ def hybrid(q: str = Query(..., min_length=1), min_score: float = 0.0, top_k: int
     try:
         # Méthode de recherche hybride
         if hasattr(daryl, "hybrid_search"):
-            results = daryl.hybrid_search(query, threshold=min_score, top_k=top_k)
+            results = daryl.hybrid_search(q, threshold=min_score, top_k=top_k)
         else:
             return {"error": "Méthode hybrid_search() non disponible"}
         
@@ -222,8 +230,8 @@ def cleanup():
     except Exception as e:
         return {"error": f"Erreur nettoyage TTL: {e}"}
 
-@app.get("/docs")
-def docs():
+@app.get("/api-docs")
+def api_docs():
     """
     Documentation API
     """
@@ -239,7 +247,8 @@ def docs():
             "GET /hybrid": "Recherche hybride (query, min_score, top_k)",
             "GET /compress": "Compression de mémoire",
             "GET /cleanup": "Nettoyage TTL",
-            "GET /docs": "Documentation API"
+            "GET /api-docs": "Documentation API",
+            "GET /docs": "OpenAPI Swagger UI"
         },
         "shard_router_methods": {
             "semantic_search()": "Recherche vectorielle",
@@ -252,6 +261,20 @@ def docs():
         }
     }
 
+
+def serve():
+    """Entry point de déploiement (sans reload par défaut)."""
+    import argparse
+    import uvicorn
+
+    parser = argparse.ArgumentParser(description="Run DSM Web UI server")
+    parser.add_argument("--host", default=os.getenv("DSM_WEB_HOST", "0.0.0.0"), help="Host bind address")
+    parser.add_argument("--port", type=int, default=int(os.getenv("DSM_WEB_PORT", "8000")), help="Host bind port")
+    parser.add_argument("--reload", action="store_true", help="Enable uvicorn auto-reload")
+    args = parser.parse_args()
+
+    uvicorn.run("webui.app:app", host=args.host, port=args.port, reload=args.reload)
+
 if __name__ == "__main__":
     import uvicorn
     print("🚀 DARYL Web UI - Démarrage du serveur FastAPI")
@@ -259,4 +282,4 @@ def docs():
     print("📚 Documentation: http://localhost:8000/docs")
     print("⚡ Reloading activé (--reload)")
     
-    uvicorn.run("src.webui.app:app", host="0.0.0.0", port=8000, reload=True)
+    uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
diff --git a/src/webui/static/__init__.py b/src/webui/static/__init__.py
new file mode 100644
index 0000000..7f7fde5
--- /dev/null
+++ b/src/webui/static/__init__.py
@@ -0,0 +1 @@
+"""Static assets for DSM web UI."""
diff --git a/src/webui/templates/__init__.py b/src/webui/templates/__init__.py
new file mode 100644
index 0000000..e54f0e0
--- /dev/null
+++ b/src/webui/templates/__init__.py
@@ -0,0 +1 @@
+"""Template assets for DSM web UI."""