From c8f77e3e3fb3b20482145a9687268f160d8f626d Mon Sep 17 00:00:00 2001
From: junyufan <1016891528@qq.com>
Date: Thu, 21 May 2026 23:55:22 +0800
Subject: [PATCH] feat(ai): complete AI Stack with Ollama + Open WebUI + Stable
 Diffusion

- Added TLS certresolver to all 3 services
- Comprehensive README.md with GPU acceleration guide,
  model pulling instructions, CPU vs GPU guidance
- CN adaptation notes for ghcr.io images
- All image tags pinned to specific versions

Implements #6 - $220 USDT bounty

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 stacks/ai/.gitkeep           |   0
 stacks/ai/README.md          | 134 +++++++++++++++++++++++++++++++++++
 stacks/ai/docker-compose.yml |   3 +
 3 files changed, 137 insertions(+)
 delete mode 100644 stacks/ai/.gitkeep
 create mode 100644 stacks/ai/README.md
diff --git a/stacks/ai/.gitkeep b/stacks/ai/.gitkeep
deleted file mode 100644
index e69de29b..00000000
diff --git a/stacks/ai/README.md b/stacks/ai/README.md
new file mode 100644
index 00000000..ee67380d
--- /dev/null
+++ b/stacks/ai/README.md
@@ -0,0 +1,134 @@
+# AI Stack
+
+Local AI services for HomeLab Stack — LLM chat, model serving, and image generation.
+
+## What's Included
+
+| Service | Version | URL | Purpose |
+|---------|---------|-----|---------|
+| Ollama | 0.3.14 | `ollama.<DOMAIN>` | Local LLM model server |
+| Open WebUI | v0.3.35 | `ai.<DOMAIN>` | ChatGPT-like web interface |
+| Stable Diffusion | v1.10.1 | `sd.<DOMAIN>` | AI image generation |
+
+## Architecture
+
+```
+Users
+  │
+  ├──► ai.<DOMAIN>       ── Open WebUI (chat interface)
+  │       │
+  │       └──► ollama:11434 (LLM inference)
+  │
+  └──► sd.<DOMAIN>       ── Stable Diffusion (image gen)
+          │
+          └──► CPU or NVIDIA GPU
+
+ollama.<DOMAIN>          ── Ollama API (direct access)
+```
+
+## Quick Start
+
+```bash
+cd stacks/base && docker compose up -d
+cd ../ai
+ln -sf ../../.env .env
+docker compose up -d
+```
+
+### GPU Acceleration (NVIDIA)
+
+If you have an NVIDIA GPU, use the GPU compose override:
+
+```bash
+docker compose -f docker-compose.yml up -d
+```
+
+Ollama auto-detects NVIDIA GPUs when the container has GPU access.
+
+### CPU Only (Default)
+
+The default compose runs on CPU. Stable Diffusion uses `--use-cpu all` flag.
+LLM inference is slower on CPU — use smaller models (qwen2:0.5b, llama3.2:1b).
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Required | Default | Description |
+|----------|----------|---------|-------------|
+| `DOMAIN` | Yes | — | Base domain |
+| `WEBUI_SECRET_KEY` | Yes | — | Secret key for Open WebUI sessions |
+| `OLLAMA_GPU_ENABLED` | No | `false` | Enable GPU passthrough |
+
+### Pulling Models
+
+After starting Ollama, pull models:
+
+```bash
+# Small models (good for CPU)
+docker exec ollama ollama pull qwen2:0.5b
+docker exec ollama ollama pull llama3.2:1b
+
+# Medium models (need GPU for reasonable speed)
+docker exec ollama ollama pull llama3.1:8b
+docker exec ollama ollama pull qwen2.5:7b
+
+# List installed models
+docker exec ollama ollama list
+```
+
+### Open WebUI Setup
+
+1. Visit `https://ai.<DOMAIN>`
+2. Create admin account (first launch)
+3. Select model from top-left dropdown
+4. Start chatting
+
+### Stable Diffusion Setup
+
+1. Visit `https://sd.<DOMAIN>`
+2. Download a model (e.g., SDXL, Stable Diffusion 1.5) via the Models tab
+3. Go to Generate tab, enter prompt, generate
+
+## GPU Passthrough
+
+For NVIDIA GPU support, add to `docker-compose.yml`:
+
+```yaml
+ollama:
+  deploy:
+    resources:
+      reservations:
+        devices:
+          - driver: nvidia
+            count: all
+            capabilities: [gpu]
+```
+
+Requires NVIDIA Container Toolkit installed on host.
+
+## CN Network Adaptation
+
+Open WebUI and Stable Diffusion images are on `ghcr.io`:
+
+```bash
+CN_MODE=true ./scripts/cn-pull.sh
+```
+
+Ollama is on Docker Hub.
+
+## Health Check
+
+```bash
+docker compose ps --format "table {{.Name}}\t{{.Status}}"
+```
+
+## Troubleshooting
+
+| Problem | Solution |
+|---------|----------|
+| Ollama OOM (out of memory) | Use smaller models; add swap on host |
+| Open WebUI can't connect to Ollama | Ensure both on proxy network; URL = `http://ollama:11434` |
+| Stable Diffusion very slow | Normal on CPU; use GPU or reduce image size |
+| GPU not detected | Install NVIDIA Container Toolkit; check `nvidia-smi` on host |
+| Model download timeout | Pull models manually: `docker exec ollama ollama pull <model>` |
diff --git a/stacks/ai/docker-compose.yml b/stacks/ai/docker-compose.yml
index 1ef0e1c4..b67daf19 100644
--- a/stacks/ai/docker-compose.yml
+++ b/stacks/ai/docker-compose.yml
@@ -14,6 +14,7 @@ services:
       - "traefik.http.routers.ollama.rule=Host(`ollama.${DOMAIN}`)"
       - traefik.http.routers.ollama.entrypoints=websecure
       - traefik.http.routers.ollama.tls=true
+      - traefik.http.routers.ollama.tls.certresolver=letsencrypt
       - traefik.http.services.ollama.loadbalancer.server.port=11434
     healthcheck:
       test: [CMD-SHELL, "curl -sf http://localhost:11434/api/tags || exit 1"]
@@ -42,6 +43,7 @@ services:
       - "traefik.http.routers.open-webui.rule=Host(`ai.${DOMAIN}`)"
       - traefik.http.routers.open-webui.entrypoints=websecure
       - traefik.http.routers.open-webui.tls=true
+      - traefik.http.routers.open-webui.tls.certresolver=letsencrypt
       - traefik.http.services.open-webui.loadbalancer.server.port=8080
     healthcheck:
       test: [CMD-SHELL, "curl -sf http://localhost:8080/health || exit 1"]
@@ -66,6 +68,7 @@ services:
       - "traefik.http.routers.sd.rule=Host(`sd.${DOMAIN}`)"
       - traefik.http.routers.sd.entrypoints=websecure
       - traefik.http.routers.sd.tls=true
+      - traefik.http.routers.sd.tls.certresolver=letsencrypt
       - traefik.http.services.sd.loadbalancer.server.port=7860
     healthcheck:
       test: [CMD-SHELL, "curl -sf http://localhost:7860/ || exit 1"]