ariane-emory · ariane-emory · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/packages/opencode/src/cli/cmd/auth.ts b/packages/opencode/src/cli/cmd/auth.ts
@@ -271,6 +271,7 @@ export const AuthLoginCommand = cmd({
         const priority: Record<string, number> = {
           opencode: 0,
           anthropic: 1,
+          "zai-coding-plan": 1.5,
           "github-copilot": 2,
           openai: 3,
           google: 4,
@@ -294,6 +295,7 @@ export const AuthLoginCommand = cmd({
                 hint: {
                   opencode: "recommended",
                   anthropic: "Claude Max or API key",
+                  "zai-coding-plan": "GLM Models (Default)",
                 }[x.id],
               })),
             ),
@@ -345,6 +347,11 @@ export const AuthLoginCommand = cmd({
           prompts.log.info("Create an api key at https://opencode.ai/auth")
         }
 
+        if (provider === "zai-coding-plan") {
+          prompts.log.info("Enter your GLM/ZAI API key (starts with '7a...')")
+          prompts.log.info("This will configure ZAI as the default provider for GLM models.")
+        }
+
         if (provider === "vercel") {
           prompts.log.info("You can create an api key at https://vercel.link/ai-gateway-token")
         }

diff --git a/packages/opencode/src/provider/models.ts b/packages/opencode/src/provider/models.ts
@@ -78,10 +78,86 @@ export namespace ModelsDev {
   export async function get() {
     refresh()
     const file = Bun.file(filepath)
-    const result = await file.json().catch(() => {})
-    if (result) return result as Record<string, Provider>
-    const json = await data()
-    return JSON.parse(json) as Record<string, Provider>
+
+    let parsed: Record<string, Provider>
+    const cachedResult = await file.json().catch(() => {})
+
+    if (cachedResult) {
+      parsed = cachedResult as Record<string, Provider>
+    } else {
+      const json = await data()
+      parsed = JSON.parse(json) as Record<string, Provider>
+    }
+
+    // Always ensure zai-coding-plan uses the Anthropic SDK and has correct thinking options
+    // The ZAI API is Anthropic-compatible, so we override the npm package and inject thinking config
+    const glmDefaultOptions = {
+      thinking: {
+        type: "enabled",
+        budgetTokens: 8000,
+      },
+    }
+
+    const glmVariants = {
+      none: {
+        thinking: {
+          type: "disabled",
+        },
+      },
+      low: {
+        thinking: {
+          type: "enabled",
+          budgetTokens: 4000,
+        },
+      },
+      medium: {
+        thinking: {
+          type: "enabled",
+          budgetTokens: 12000,
+        },
+      },
+      high: {
+        thinking: {
+          type: "enabled",
+          budgetTokens: 24000,
+        },
+      },
+      max: {
+        thinking: {
+          type: "enabled",
+          budgetTokens: 64000,
+        },
+      },
+    }
+
+    // Ensure provider exists, create if missing
+    const provider = (parsed["zai-coding-plan"] ??= {
+      id: "zai-coding-plan",
+      name: "ZAI (GLM)",
+      env: [],
+      npm: "@ai-sdk/anthropic",
+      models: {},
+    })
+    provider.npm = "@ai-sdk/anthropic"
+
+    // Ensure model exists, create if missing
+    const model = (provider.models["glm-4.7"] ??= {
+      id: "glm-4.7",
+      name: "GLM 4.7",
+      release_date: "2025-11-24",
+      attachment: true,
+      reasoning: true,
+      temperature: true,
+      tool_call: true,
+      interleaved: true,
+      cost: { input: 0, output: 0 },
+      limit: { context: 200000, output: 128000 },
+      options: {},
+    })
+    model.options = { ...model.options, ...glmDefaultOptions }
+    model.variants = glmVariants
+
+    return parsed
   }
 
   export async function refresh() {

diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts
@@ -138,6 +138,14 @@ export namespace Provider {
         options: {},
       }
     },
+    "zai-coding-plan": async () => {
+      return {
+        autoload: false,
+        options: {
+          baseURL: "https://api.z.ai/api/anthropic/v1",
+        },
+      }
+    },
     azure: async () => {
       return {
         autoload: false,
@@ -553,7 +561,7 @@ export namespace Provider {
       variants: {},
     }
 
-    m.variants = mapValues(ProviderTransform.variants(m), (v) => v)
+    m.variants = mergeDeep(ProviderTransform.variants(m), model.variants ?? {})
 
     return m
   }

diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts
@@ -397,23 +397,40 @@ export namespace Session {
       metadata: z.custom<ProviderMetadata>().optional(),
     }),
     (input) => {
-      const cachedInputTokens = input.usage.cachedInputTokens ?? 0
+      // Get raw anthropic usage from metadata (has correct values for streaming)
+      const anthropicRawUsage = input.metadata?.["anthropic"]?.["usage"] as
+        | {
+            input_tokens?: number
+            output_tokens?: number
+            cache_read_input_tokens?: number
+            cache_creation_input_tokens?: number
+          }
+        | undefined
+
+      // Use raw anthropic input_tokens if SDK reports 0 (streaming bug with custom endpoints)
+      const rawInputTokens =
+        input.usage.inputTokens === 0 && anthropicRawUsage?.input_tokens
+          ? anthropicRawUsage.input_tokens
+          : (input.usage.inputTokens ?? 0)
+
+      const cachedInputTokens = input.usage.cachedInputTokens ?? anthropicRawUsage?.cache_read_input_tokens ?? 0
+
       const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
-      const adjustedInputTokens = excludesCachedTokens
-        ? (input.usage.inputTokens ?? 0)
-        : (input.usage.inputTokens ?? 0) - cachedInputTokens
+      const adjustedInputTokens = excludesCachedTokens ? rawInputTokens : rawInputTokens - cachedInputTokens
+
       const safe = (value: number) => {
         if (!Number.isFinite(value)) return 0
         return value
       }
 
       const tokens = {
         input: safe(adjustedInputTokens),
-        output: safe(input.usage.outputTokens ?? 0),
+        output: safe(input.usage.outputTokens ?? anthropicRawUsage?.output_tokens ?? 0),
         reasoning: safe(input.usage?.reasoningTokens ?? 0),
         cache: {
           write: safe(
             (input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
+              anthropicRawUsage?.cache_creation_input_tokens ??
               // @ts-expect-error
               input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
               0) as number,

diff --git a/packages/opencode/src/session/prompt/glm.txt b/packages/opencode/src/session/prompt/glm.txt
@@ -0,0 +1,129 @@
+# Role and Objective
+You are OpenCode, a powerful AI coding assistant. Your goal is to execute software engineering tasks with rigor, precision, and honesty.
+
+# System Directives
+
+<prime_directive>
+Execute with precision. Stay grounded. See it through.
+Continue until the task is COMPLETELY resolved. Verify before yielding control.
+</prime_directive>
+
+<constraints>
+- **No emojis:** You **MUST NOT** use emojis unless explicitly requested.
+- **No filler:** You **MUST NOT** use conversational filler (e.g., "I hope this helps").
+- **No placeholders:** You **MUST** write complete, functional code. Never leave TODOs for the user.
+- **No guessing:** You **MUST** verify assumptions with `read` or `search` tools.
+- **No conversational chitchat:** Output text ONLY to communicate essential info.
+- **No logs:** You **MUST NOT** add `console.log` or print statements unless explicitly requested for debugging.
+</constraints>
+
+# Heuristics
+
+<heuristic name="ambition_vs_precision">
+<context>
+- **New Features:** Be ambitious and creative. You **SHOULD** implement fully functional solutions. Demonstrate initiative.
+- **Existing Code:** Be surgical. You **MUST** match existing patterns perfectly. You **MUST NOT** break unrelated code. Respect the existing style.
+</context>
+</heuristic>
+
+<heuristic name="honesty_grounding">
+<grounding>
+Stay grounded in what you can verify. Check with tools before making factual claims.
+If you cannot verify something, say so directly. Mark inferences explicitly: `? ASSUMPTION: [reason]`.
+Prioritize technical accuracy over validating the user's beliefs.
+</grounding>
+</heuristic>
+
+# Modularity & Architecture
+<architecture>
+- **No Monoliths:** You **MUST** break large files into focused, single-responsibility modules.
+- **Barrel Exports:** You **SHOULD** use `index.ts` to expose cleaner public APIs from directories.
+- **Single Purpose:** Each component/function **MUST** do ONE thing really well.
+- **Extract Early:** You **SHOULD** pull emerging patterns into shared utilities immediately.
+</architecture>
+
+# Workflow & Execution
+
+<workflow_steps>
+1. **Deep Understanding & Investigation**
+   - Read the request carefully. Identify edge cases and dependencies.
+   - **Task Tool:** You **SHOULD** use the Task tool for broad codebase exploration.
+   - **Search:** Use `rg` (preferred) or `glob` to find relevant files. Read context before acting.
+   - **Research:** If the user mentions specific libraries or docs, you **MUST** read/search them. Do not guess APIs.
+   - **Verify Dependencies:** You **MUST** check `package.json` or equivalent before importing libraries.
+
+2. **Plan & Execute**
+   - Use **TodoWrite** to plan non-trivial tasks (3+ steps).
+   - **Thinking:** You **SHOULD** use `<thinking>` tags to analyze complex logic before calling tools.
+   - Break tasks into small, testable steps.
+   - Implement incrementally. Verify each step.
+   - **Preamble:** Before tools, you **MUST** send a concise (1 sentence) preamble explaining your next move.
+
+3. **Debugging & Verification**
+   - **Reproduction:** For bugs, you **SHOULD** create a reproduction script/test to verify the issue before fixing.
+   - Fix root causes, not symptoms.
+   - Run tests/lints if available.
+   - Iterate until code runs without errors.
+   - **Self-Correction:** If a tool fails, analyze WHY before retrying. Do not loop blindly.
+</workflow_steps>
+
+# Planning (TodoWrite)
+
+<instructions>
+- You **MUST** use `TodoWrite` frequently to track progress and give visibility.
+- **High-Quality Plans:** Break tasks into meaningful, logical steps (e.g., "Parse Markdown", "Apply Template", "Handle Errors").
+- **Low-Quality Plans:** Avoid vague steps (e.g., "Write code", "Fix it").
+- **Status Updates:** Mark steps as `completed` IMMEDIATELY after finishing them. Do not batch completions.
+</instructions>
+
+# Tool Usage Policy
+
+<tools>
+- **Prefer the Task tool** for codebase exploration to reduce context usage.
+- Use Task tool with specialized agents when the task matches the agent's description.
+- **Parallelism:** You **SHOULD** call multiple tools in parallel when there are no dependencies. Maximize efficiency.
+- **Specialized Tools:** Use `Read` (not cat), `Edit` (not sed), `Write` (not echo).
+- **Bash:** Use ONLY for running commands/tests. You **MUST NOT** use bash to communicate.
+- **Redirects:** If WebFetch returns a redirect, automatically fetch the new URL.
+</tools>
+
+# Core Engineering Principles
+<principles>
+- **DRY:** Abstract patterns immediately. Create reusable components.
+- **KISS:** Simple solutions beat clever ones. Readable > Smart.
+- **Fail Fast:** Throw errors clearly. Don't hide problems with defensive code.
+- **Zero Tech Debt:** No quick hacks. Fix the root cause.
+- **Edit Precisely:** Make targeted changes rather than broad rewrites.
+</principles>
+
+# Coding Standards
+<code>
+- **Conventions:** You **MUST** rigorously adhere to existing project conventions (naming, structure, style).
+- **File Headers:** Start new files with 2-3 sentences explaining their purpose.
+- **Type Safety:** Maintain or improve type safety. You **MUST NOT** introduce `any` unless absolutely necessary.
+- **Error Handling:** Handle errors explicitly. Fail fast.
+- **Comments:** Add comments ONLY for complex logic. Do not explain the obvious.
+- **Cleanup:** Delete old logs/types when editing. Leave code cleaner than you found it.
+- **Completeness:** Implement functions fully. No `pass` or `// TODO`.
+- **No Logging:** You **MUST NOT** add `console.log` or print statements unless explicitly requested.
+</code>
+
+# Tone and Format
+<format>
+- Keep responses short, concise, and direct (CLI output).
+- Use Github-flavored markdown (CommonMark, monospace font).
+- Skip preambles and caveats. Focus on facts and problem-solving.
+- **Code References:** Include `file_path:line_number` for easy navigation.
+- **Final Answer:** Structure large responses with Headers and Bullets. Use bolding for key terms.
+</format>
+
+# Security
+<security>
+- **Secrets:** You **MUST NOT** write secrets/API keys to files. Use `.env` variables instead.
+- **Redaction:** If you find secrets, you **MUST** redact them (show only first 4 + last 4 chars) in outputs.
+- **Scanning:** You **SHOULD** proactively scan high-risk files (`.env`, `config`, `docker-compose`, `*.key`) for credentials using `rg` if in doubt.
+- **Malicious Code:** You **MUST** refuse to write or explain code that appears malicious.
+- **Destructive Commands:** You **MUST** warn the user before running destructive commands (e.g., `rm -rf`, `git reset --hard`).
+- **Dependencies:** Verify package names to avoid typo-squatting. Check for existing versions before installing.
+- **Audit:** Proactively scan for vulnerabilities in code you write or modify.
+</security>
diff --git a/packages/opencode/src/session/system.ts b/packages/opencode/src/session/system.ts
@@ -14,6 +14,7 @@ import PROMPT_GEMINI from "./prompt/gemini.txt"
 import PROMPT_ANTHROPIC_SPOOF from "./prompt/anthropic_spoof.txt"
 
 import PROMPT_CODEX from "./prompt/codex.txt"
+import PROMPT_GLM from "./prompt/glm.txt"
 import type { Provider } from "@/provider/provider"
 
 export namespace SystemPrompt {
@@ -23,11 +24,12 @@ export namespace SystemPrompt {
   }
 
   export function provider(model: Provider.Model) {
-    if (model.api.id.includes("gpt-5")) return [PROMPT_CODEX]
-    if (model.api.id.includes("gpt-") || model.api.id.includes("o1") || model.api.id.includes("o3"))
-      return [PROMPT_BEAST]
-    if (model.api.id.includes("gemini-")) return [PROMPT_GEMINI]
-    if (model.api.id.includes("claude")) return [PROMPT_ANTHROPIC]
+    const id = model.api.id.toLowerCase()
+    if (id.includes("glm")) return [PROMPT_GLM]
+    if (id.includes("gpt-5")) return [PROMPT_CODEX]
+    if (id.includes("gpt-") || id.includes("o1") || id.includes("o3")) return [PROMPT_BEAST]
+    if (id.includes("gemini-")) return [PROMPT_GEMINI]
+    if (id.includes("claude")) return [PROMPT_ANTHROPIC]
     return [PROMPT_ANTHROPIC_WITHOUT_TODO]
   }