diff --git a/agents/data-analyst-session.ts b/agents/data-analyst-session.ts new file mode 100644 index 00000000..fe8bfb9b --- /dev/null +++ b/agents/data-analyst-session.ts @@ -0,0 +1,147 @@ +/** + * RUNTIME — run on every invocation. + * + * Usage: + * DATA_ANALYST_AGENT_ID= \ + * DATA_ANALYST_ENVIRONMENT_ID= \ + * npx ts-node agents/data-analyst-session.ts "What are the top 5 events by volume?" + * + * Required env vars (from data-analyst-setup.ts output): + * ANTHROPIC_API_KEY + * DATA_ANALYST_AGENT_ID + * DATA_ANALYST_ENVIRONMENT_ID + * + * Optional: + * DATA_ANALYST_AGENT_VERSION — pin to a specific agent version + * DATA_ANALYST_VAULT_ID — vault containing Amplitude MCP credentials + */ + +import Anthropic from "@anthropic-ai/sdk"; + +const AGENT_ID = process.env.DATA_ANALYST_AGENT_ID; +const AGENT_VERSION = process.env.DATA_ANALYST_AGENT_VERSION + ? Number(process.env.DATA_ANALYST_AGENT_VERSION) + : undefined; +const ENVIRONMENT_ID = process.env.DATA_ANALYST_ENVIRONMENT_ID; +const VAULT_ID = process.env.DATA_ANALYST_VAULT_ID; + +if (!AGENT_ID || !ENVIRONMENT_ID) { + console.error( + "Missing required env vars: DATA_ANALYST_AGENT_ID, DATA_ANALYST_ENVIRONMENT_ID\n" + + "Run agents/data-analyst-setup.ts first.", + ); + process.exit(1); +} + +const userMessage = process.argv.slice(2).join(" ") || "Hello! What can you help me analyse?"; + +async function runSession() { + const client = new Anthropic(); + + // 1. Create session referencing the pre-created agent + console.log(`Starting session with agent ${AGENT_ID}...`); + const session = await client.beta.sessions.create({ + agent: AGENT_VERSION + ? { type: "agent", id: AGENT_ID, version: AGENT_VERSION } + : AGENT_ID, + environment_id: ENVIRONMENT_ID, + title: `Data analysis — ${new Date().toISOString()}`, + ...(VAULT_ID ? { vault_ids: [VAULT_ID] } : {}), + }); + console.log(`Session ${session.id} created (status: ${session.status})\n`); + + // 2. Open stream BEFORE sending the message (stream-first ordering) + const stream = await client.beta.sessions.stream(session.id); + + await client.beta.sessions.events.send(session.id, { + events: [ + { + type: "user.message", + content: [{ type: "text", text: userMessage }], + }, + ], + }); + + // 3. Process the event stream + const customToolCalls: Array<{ id: string; tool_name: string; input: unknown }> = []; + + for await (const event of stream) { + switch (event.type) { + case "agent.message": + for (const block of event.content) { + if (block.type === "text") { + process.stdout.write(block.text); + } + } + break; + + case "agent.custom_tool_use": + // Custom tool calls — send results back to continue the session + customToolCalls.push({ id: event.id, tool_name: event.tool_name, input: event.input }); + break; + + case "session.status_idle": + if (event.stop_reason?.type === "requires_action") { + // Session is waiting for custom tool results — handle them + if (customToolCalls.length > 0) { + const results = customToolCalls.splice(0).map((call) => ({ + type: "user.custom_tool_result" as const, + custom_tool_use_id: call.id, + content: [ + { + type: "text" as const, + text: `Tool "${call.tool_name}" is not implemented in this runner. Input was: ${JSON.stringify(call.input)}`, + }, + ], + })); + await client.beta.sessions.events.send(session.id, { events: results }); + } + } else { + // Terminal idle — end_turn or retries_exhausted + console.log("\n\n[Session complete]"); + } + break; + + case "session.status_terminated": + console.log("\n[Session terminated]"); + break; + + case "session.error": + console.error("\n[Session error]", event); + break; + + case "span.model_request_end": + // Log token usage for cost tracking + if (event.model_usage) { + const u = event.model_usage; + console.error( + `\n[usage] input=${u.input_tokens} output=${u.output_tokens}` + + ` cache_read=${u.cache_read_input_tokens} cache_write=${u.cache_creation_input_tokens}`, + ); + } + break; + } + } + + // 4. List any files the agent wrote to /mnt/session/outputs/ + try { + const files = await client.beta.files.list({ + scope_id: session.id, + betas: ["managed-agents-2026-04-01"], + } as Parameters[0]); + + if (files.data.length > 0) { + console.log("\n[Output files]"); + for (const f of files.data) { + console.log(` ${f.filename} (${f.size_bytes} bytes) — file_id: ${f.id}`); + } + } + } catch { + // scope_id filtering may not be available in all SDK versions + } +} + +runSession().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/agents/data-analyst-setup.ts b/agents/data-analyst-setup.ts new file mode 100644 index 00000000..b5101e93 --- /dev/null +++ b/agents/data-analyst-setup.ts @@ -0,0 +1,111 @@ +/** + * ONE-TIME SETUP — run once, save the printed IDs to your .env or config. + * + * Usage: + * npx ts-node agents/data-analyst-setup.ts + * # or: bun agents/data-analyst-setup.ts + * + * Required env vars: + * ANTHROPIC_API_KEY + * + * Optional (for Amplitude MCP auth): + * AMPLITUDE_ACCESS_TOKEN — OAuth access token for https://mcp.amplitude.com/mcp + * AMPLITUDE_REFRESH_TOKEN — OAuth refresh token (enables auto-refresh) + */ + +import Anthropic from "@anthropic-ai/sdk"; + +const SYSTEM_PROMPT = `You analyze data. Given a dataset (file path, URL, or query) and a question: + +1. Load the data and print its shape, column names, dtypes, and a small sample. Always look before you compute. +2. Clean obvious issues — nulls, duplicates, type mismatches — and note what you changed. +3. Answer the question with code. Prefer pandas/polars for tabular work, matplotlib/plotly for charts. Show intermediate results so your reasoning is checkable. +4. For product-analytics questions, query Amplitude directly — event funnels, retention cohorts, property breakdowns — and link the chart. +5. Save any charts or derived tables to /mnt/session/outputs/ and summarize findings in plain language, including caveats (sample size, missing data, correlation-vs-causation). + +Default to simple, readable analysis over clever one-liners. A clear bar chart usually beats a dense heatmap.`; + +const AMPLITUDE_MCP_URL = "https://mcp.amplitude.com/mcp"; + +async function setup() { + const client = new Anthropic(); + + // 1. Create environment + console.log("Creating environment..."); + const environment = await client.beta.environments.create({ + name: "data-analyst-env", + config: { + type: "cloud", + networking: { type: "unrestricted" }, + }, + }); + console.log(`✓ Environment created: ${environment.id}`); + + // 2. Optionally create a vault for Amplitude MCP credentials + let vaultId: string | undefined; + const accessToken = process.env.AMPLITUDE_ACCESS_TOKEN; + + if (accessToken) { + console.log("Creating vault for Amplitude credentials..."); + const vault = await client.beta.vaults.create({ name: "data-analyst-vault" }); + + await client.beta.vaults.credentials.create(vault.id, { + display_name: "Amplitude MCP", + auth: { + type: "mcp_oauth", + mcp_server_url: AMPLITUDE_MCP_URL, + access_token: accessToken, + ...(process.env.AMPLITUDE_REFRESH_TOKEN + ? { + refresh: { + refresh_token: process.env.AMPLITUDE_REFRESH_TOKEN, + // Update these to match your OAuth app registration + client_id: process.env.AMPLITUDE_CLIENT_ID ?? "", + token_endpoint: "https://api.amplitude.com/oauth2/token", + token_endpoint_auth: { type: "none" }, + }, + } + : {}), + }, + }); + + vaultId = vault.id; + console.log(`✓ Vault created: ${vault.id}`); + } else { + console.log( + "⚠ AMPLITUDE_ACCESS_TOKEN not set — skipping vault creation.\n" + + " Set it and re-run, or attach credentials manually before starting sessions.", + ); + } + + // 3. Create the agent + console.log("Creating Data analyst agent..."); + const agent = await client.beta.agents.create({ + name: "Data analyst", + model: "claude-sonnet-4-6", + system: SYSTEM_PROMPT, + mcp_servers: [ + { type: "url", name: "amplitude", url: AMPLITUDE_MCP_URL }, + ], + tools: [ + { type: "agent_toolset_20260401", default_config: { enabled: true } }, + { type: "mcp_toolset", mcp_server_name: "amplitude" }, + ], + }); + console.log(`✓ Agent created: ${agent.id} (version: ${agent.version})`); + + // 4. Print env vars to persist + console.log("\n─── Add these to your .env ───"); + console.log(`DATA_ANALYST_AGENT_ID=${agent.id}`); + console.log(`DATA_ANALYST_AGENT_VERSION=${agent.version}`); + console.log(`DATA_ANALYST_ENVIRONMENT_ID=${environment.id}`); + if (vaultId) { + console.log(`DATA_ANALYST_VAULT_ID=${vaultId}`); + } + console.log("──────────────────────────────"); +} + +setup().catch((err) => { + console.error(err); + process.exit(1); +});