From 17204f2ff5969a9693b5e6a8477cff2853c13eff Mon Sep 17 00:00:00 2001 From: DK09876 Date: Mon, 11 May 2026 14:24:04 -0700 Subject: [PATCH 1/2] feat: add Claude Agent SDK + Hindsight memory notebook Interactive notebook demonstrating: - Explicit memory tools (retain, recall, reflect) via MCP server - Automatic memory hooks (auto-recall, auto-retain) - Knowledge compounding across sessions Co-Authored-By: Claude Opus 4.6 --- notebooks/claude-agent-sdk.ipynb | 429 +++++++++++++++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100644 notebooks/claude-agent-sdk.ipynb diff --git a/notebooks/claude-agent-sdk.ipynb b/notebooks/claude-agent-sdk.ipynb new file mode 100644 index 0000000..f4cb97f --- /dev/null +++ b/notebooks/claude-agent-sdk.ipynb @@ -0,0 +1,429 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Claude Agent SDK with Persistent Memory\n", + "\n", + "Build a Claude agent that remembers across sessions using Hindsight memory tools and automatic hooks.\n", + "\n", + "## Features\n", + "- In-process MCP server with retain, recall, and reflect tools\n", + "- Automatic memory hooks that inject context before each prompt\n", + "- Auto-retain agent results for future sessions\n", + "- Knowledge that compounds over repeated runs\n", + "\n", + "## Prerequisites\n", + "- **Claude Code CLI** installed and authenticated:\n", + " ```bash\n", + " npm install -g @anthropic-ai/claude-code\n", + " claude auth login # or set ANTHROPIC_API_KEY below\n", + " ```\n", + "- An LLM API key for Hindsight (OpenAI, Gemini, etc.)\n", + "- Hindsight running locally via Docker (see setup below)\n", + "- Alternatively, a [Hindsight Cloud](https://ui.hindsight.vectorize.io/signup) account (no Docker needed)\n", + "\n", + "> **Note:** The Claude Agent SDK runs the Claude Code CLI as a subprocess. You need either `claude auth login` or `ANTHROPIC_API_KEY` set in your environment. Just having an API key without the CLI installed will not work.\n", + "\n", + "## Start Hindsight Locally\n", + "\n", + "Before running this notebook, start Hindsight in a terminal:\n", + "\n", + "```bash\n", + "export LLM_API_KEY=\"your-llm-api-key\"\n", + "\n", + "docker run --rm -it --pull always -p 8888:8888 -p 9999:9999 \\\n", + " -e HINDSIGHT_API_LLM_API_KEY=$LLM_API_KEY \\\n", + " -e HINDSIGHT_API_LLM_MODEL=gpt-4o-mini \\\n", + " -v $HOME/.hindsight-docker:/home/hindsight/.pg0 \\\n", + " ghcr.io/vectorize-io/hindsight:latest\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "## 1. Install Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q hindsight-claude-agent-sdk nest-asyncio" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "## 2. Configure Environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()\n", + "\n", + "import os\n", + "import getpass\n", + "\n", + "# Set your Anthropic API key (needed for Claude Agent SDK)\n", + "if not os.environ.get(\"ANTHROPIC_API_KEY\"):\n", + " os.environ[\"ANTHROPIC_API_KEY\"] = getpass.getpass(\"Enter your Anthropic API key: \")\n", + "\n", + "# Hindsight connection (defaults to local self-hosted instance)\n", + "HINDSIGHT_API_URL = os.getenv(\"HINDSIGHT_API_URL\", \"http://localhost:8888\")\n", + "HINDSIGHT_API_KEY = os.getenv(\"HINDSIGHT_API_KEY\", None)\n", + "BANK_ID = \"claude-agent-demo\"\n", + "\n", + "print(f\"Hindsight API: {HINDSIGHT_API_URL}\")\n", + "print(f\"Bank ID: {BANK_ID}\")" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "## 3. Create a Memory Bank" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "from hindsight_client import Hindsight\n", + "\n", + "hindsight = Hindsight(base_url=HINDSIGHT_API_URL, api_key=HINDSIGHT_API_KEY)\n", + "\n", + "# Create a dedicated bank for this demo (safe to re-run)\n", + "try:\n", + " hindsight.create_bank(\n", + " bank_id=BANK_ID,\n", + " name=\"Claude Agent Demo\",\n", + " mission=\"Remember user preferences, decisions, and project context for a software development assistant.\",\n", + " )\n", + " print(f\"Bank '{BANK_ID}' created.\")\n", + "except Exception:\n", + " print(f\"Bank '{BANK_ID}' already exists, continuing.\")" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "## 4. Set Up Memory Tools\n", + "\n", + "Create an in-process MCP server with retain, recall, and reflect tools:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "from hindsight_claude_agent_sdk import create_hindsight_server\n", + "\n", + "server = create_hindsight_server(\n", + " bank_id=BANK_ID,\n", + " hindsight_api_url=HINDSIGHT_API_URL,\n", + " api_key=HINDSIGHT_API_KEY,\n", + " tags=[\"source:claude-agent-sdk-demo\"],\n", + ")\n", + "\n", + "print(\"Hindsight MCP server created with tools: hindsight_retain, hindsight_recall, hindsight_reflect\")" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "## 5. Run Agent with Explicit Memory Tools\n", + "\n", + "The agent decides when to store and retrieve memories:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "import subprocess\n", + "import sys\n", + "import tempfile\n", + "import time\n", + "import os\n", + "\n", + "def run_agent(prompt: str, system: str = None, use_hooks: bool = False, retries: int = 3):\n", + " \"\"\"Run a Claude agent in a subprocess (Claude Agent SDK requires asyncio.run).\"\"\"\n", + " lines = []\n", + " lines.append(\"import asyncio\")\n", + " lines.append(\"from claude_agent_sdk import query, ClaudeAgentOptions\")\n", + " lines.append(\"from hindsight_claude_agent_sdk import create_hindsight_server\")\n", + " lines.append(\"\")\n", + " lines.append(\"server = create_hindsight_server(\")\n", + " lines.append(f\" bank_id={BANK_ID!r},\")\n", + " lines.append(f\" hindsight_api_url={HINDSIGHT_API_URL!r},\")\n", + " if HINDSIGHT_API_KEY:\n", + " lines.append(f\" api_key={HINDSIGHT_API_KEY!r},\")\n", + " lines.append(' tags=[\"source:claude-agent-sdk-demo\"],')\n", + " lines.append(\")\")\n", + " lines.append(\"\")\n", + "\n", + " if use_hooks:\n", + " lines.append(\"from hindsight_claude_agent_sdk import create_memory_hooks, MemoryHookConfig\")\n", + " lines.append(\"hooks = create_memory_hooks(\")\n", + " lines.append(f\" bank_id={BANK_ID!r},\")\n", + " lines.append(f\" hindsight_api_url={HINDSIGHT_API_URL!r},\")\n", + " if HINDSIGHT_API_KEY:\n", + " lines.append(f\" api_key={HINDSIGHT_API_KEY!r},\")\n", + " lines.append(\" hook_config=MemoryHookConfig(\")\n", + " lines.append(\" auto_recall=True,\")\n", + " lines.append(\" auto_retain=True,\")\n", + " lines.append(\" recall_max_results=5,\")\n", + " lines.append(\" ),\")\n", + " lines.append(\")\")\n", + " lines.append(\"\")\n", + "\n", + " lines.append(\"async def main():\")\n", + " lines.append(\" options = ClaudeAgentOptions(\")\n", + " lines.append(' mcp_servers={\"hindsight\": server},')\n", + " lines.append(' allowed_tools=[\"mcp__hindsight__*\"],')\n", + " lines.append(' model=\"sonnet\",')\n", + " lines.append(' permission_mode=\"bypassPermissions\",')\n", + " lines.append(\" )\")\n", + "\n", + " if system:\n", + " lines.append(f\" options.system_prompt = {system!r}\")\n", + " elif use_hooks:\n", + " lines.append(' options.system_prompt = \"You are a helpful coding assistant.\"')\n", + "\n", + " if use_hooks:\n", + " lines.append(\" options.hooks = hooks\")\n", + "\n", + " lines.append(\" result_text = None\")\n", + " lines.append(f\" async for msg in query(prompt={prompt!r}, options=options):\")\n", + " lines.append(\" if hasattr(msg, 'result'):\")\n", + " lines.append(\" result_text = msg.result\")\n", + " lines.append(\" print(result_text or '')\")\n", + " lines.append(\"\")\n", + " lines.append(\"asyncio.run(main())\")\n", + "\n", + " script = \"\\n\".join(lines)\n", + "\n", + " with tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".py\", delete=False) as f:\n", + " f.write(script)\n", + " tmp_path = f.name\n", + "\n", + " try:\n", + " for attempt in range(retries):\n", + " result = subprocess.run(\n", + " [sys.executable, tmp_path],\n", + " capture_output=True, text=True, timeout=300,\n", + " env={**os.environ},\n", + " )\n", + " if result.returncode == 0:\n", + " return result.stdout.strip()\n", + " # Rate limited — wait and retry\n", + " wait = 30 * (attempt + 1)\n", + " print(f\"Attempt {attempt + 1}/{retries} failed, retrying in {wait}s...\")\n", + " time.sleep(wait)\n", + "\n", + " print(\"STDERR:\", result.stderr[-1000:])\n", + " return result.stdout.strip()\n", + " finally:\n", + " os.unlink(tmp_path)\n", + "\n", + "\n", + "# Store some preferences\n", + "result = run_agent(\n", + " \"Store the following into memory using the retain tool:\\n\"\n", + " \"- I prefer Python with type hints and async/await patterns\\n\"\n", + " \"- My team uses pytest for testing with pytest-asyncio\\n\"\n", + " \"- We follow conventional commits (feat:, fix:, chore:)\\n\"\n", + " \"- Our API framework is FastAPI with Pydantic v2 models\"\n", + ")\n", + "print(\"Agent result:\", result)" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "## 6. Recall Memories in a New Session\n", + "\n", + "Simulate a fresh session — the agent has no conversation history, but can recall from memory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "# New session — no prior context\n", + "result = run_agent(\n", + " \"What testing framework does my team use? \"\n", + " \"Search your memory first before answering.\",\n", + " system=\"You are a helpful coding assistant. Always check memory before answering questions about the user.\",\n", + ")\n", + "print(\"Agent result:\", result)" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "## 7. Reflect for Deeper Synthesis\n", + "\n", + "Use reflect when you need reasoned analysis across all stored memories:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "result = run_agent(\n", + " \"Use the reflect tool to synthesize everything you know about my development stack and preferences.\",\n", + ")\n", + "print(\"Agent result:\", result)" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## 8. Run Agent with Automatic Memory Hooks\n", + "\n", + "Hooks inject memory automatically — no explicit tool calls needed. The agent gets relevant memories injected as system context before each prompt, and its results are auto-retained for future sessions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "# The agent receives past memories automatically via hooks — no tool call needed\n", + "result = run_agent(\n", + " \"What patterns should I follow when writing pytest tests for my FastAPI endpoints?\",\n", + " use_hooks=True,\n", + ")\n", + "print(\"Agent result:\", result)" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "The agent received your team's testing preferences via auto-recall before it even started working. And its result was auto-retained for future sessions." + ] + }, + { + "cell_type": "markdown", + "id": "18", + "metadata": {}, + "source": [ + "## 9. Run Again to See Knowledge Compound\n", + "\n", + "Each session adds to the knowledge base. Run the agent again with a related prompt:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "result = run_agent(\n", + " \"What commit message format should I use for this test file I just created?\",\n", + " use_hooks=True,\n", + ")\n", + "print(\"Agent result:\", result)" + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": {}, + "source": [ + "The agent recalls the conventional commits preference from earlier — even though it was stored in a completely different session." + ] + }, + { + "cell_type": "markdown", + "id": "21", + "metadata": {}, + "source": [ + "## Cleanup\n", + "\n", + "Delete the bank created during this notebook:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "hindsight.delete_bank(bank_id=BANK_ID)\n", + "print(f\"Deleted bank '{BANK_ID}'.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "3.11.5", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d92ccab373170f01eca36e137cd3b0b5edca8118 Mon Sep 17 00:00:00 2001 From: DK09876 Date: Tue, 2 Jun 2026 15:17:43 -0700 Subject: [PATCH 2/2] cookbook(claude-agent-sdk): sharpen cells 7 + 9 prompts so demos surface all stored facts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit finding (2026-06-02): cell 7 (Reflect) surfaced only Python/async out of 4 stored facts (Python+async, pytest, FastAPI, conventional-commits), and cell 9 (auto-recall hook) failed to surface conventional-commits despite the cell's markdown explicitly claiming it would. Not a Pattern-1 LLM-tool-choice issue (Claude doesn't typically have that quirk on explicit recall instructions) — both come down to recall/reflect query specificity: - Cell 7's "synthesize everything you know about my development stack and preferences" is broad enough that the server's reflect synthesis ranked Python/async highest and dropped the others. Sharpening the prompt to name the four categories explicitly forces reflect to cover each one (or say so when a category has none) — much more reliable. - Cell 9's "What commit message format should I use for this test file I just created?" is dominated semantically by "test file"; the auto-recall hook uses the raw user prompt as its query, so the test-related memories outrank the conventional-commits one. Prepending "Recall what you know about my commit conventions, then answer:" steers the recall query toward the right memory while still asking the same underlying question. Both edits live entirely in the notebook prompts — the integration code (create_memory_hooks, hindsight_reflect) is correct and unchanged. The recall_max_results=5 default is plenty; the issue is query semantics. Co-Authored-By: Claude Opus 4.7 (1M context) --- notebooks/claude-agent-sdk.ipynb | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/notebooks/claude-agent-sdk.ipynb b/notebooks/claude-agent-sdk.ipynb index f4cb97f..36581f3 100644 --- a/notebooks/claude-agent-sdk.ipynb +++ b/notebooks/claude-agent-sdk.ipynb @@ -247,7 +247,7 @@ " )\n", " if result.returncode == 0:\n", " return result.stdout.strip()\n", - " # Rate limited — wait and retry\n", + " # Rate limited \u2014 wait and retry\n", " wait = 30 * (attempt + 1)\n", " print(f\"Attempt {attempt + 1}/{retries} failed, retrying in {wait}s...\")\n", " time.sleep(wait)\n", @@ -276,7 +276,7 @@ "source": [ "## 6. Recall Memories in a New Session\n", "\n", - "Simulate a fresh session — the agent has no conversation history, but can recall from memory:" + "Simulate a fresh session \u2014 the agent has no conversation history, but can recall from memory:" ] }, { @@ -286,7 +286,7 @@ "metadata": {}, "outputs": [], "source": [ - "# New session — no prior context\n", + "# New session \u2014 no prior context\n", "result = run_agent(\n", " \"What testing framework does my team use? \"\n", " \"Search your memory first before answering.\",\n", @@ -313,7 +313,11 @@ "outputs": [], "source": [ "result = run_agent(\n", - " \"Use the reflect tool to synthesize everything you know about my development stack and preferences.\",\n", + " (\"Use the reflect tool to synthesize what you know about me, with explicit \"\n", + " \"attention to: programming languages and patterns, testing frameworks, \"\n", + " \"API/framework choices, and commit conventions. Do not skip any of these \"\n", + " \"categories \u2014 call out each one you have memories about, and say so if a \"\n", + " \"category has none.\"),\n", ")\n", "print(\"Agent result:\", result)" ] @@ -325,7 +329,7 @@ "source": [ "## 8. Run Agent with Automatic Memory Hooks\n", "\n", - "Hooks inject memory automatically — no explicit tool calls needed. The agent gets relevant memories injected as system context before each prompt, and its results are auto-retained for future sessions:" + "Hooks inject memory automatically \u2014 no explicit tool calls needed. The agent gets relevant memories injected as system context before each prompt, and its results are auto-retained for future sessions:" ] }, { @@ -335,7 +339,7 @@ "metadata": {}, "outputs": [], "source": [ - "# The agent receives past memories automatically via hooks — no tool call needed\n", + "# The agent receives past memories automatically via hooks \u2014 no tool call needed\n", "result = run_agent(\n", " \"What patterns should I follow when writing pytest tests for my FastAPI endpoints?\",\n", " use_hooks=True,\n", @@ -369,7 +373,8 @@ "outputs": [], "source": [ "result = run_agent(\n", - " \"What commit message format should I use for this test file I just created?\",\n", + " (\"Recall what you know about my commit conventions, then answer: what \"\n", + " \"commit message format should I use for this test file I just created?\"),\n", " use_hooks=True,\n", ")\n", "print(\"Agent result:\", result)" @@ -380,7 +385,7 @@ "id": "20", "metadata": {}, "source": [ - "The agent recalls the conventional commits preference from earlier — even though it was stored in a completely different session." + "The agent recalls the conventional commits preference from earlier \u2014 even though it was stored in a completely different session." ] }, { @@ -426,4 +431,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file