databricks · pkosiec · Apr 8, 2026 · Mar 31, 2026 · Apr 7, 2026
@@ -1,6 +1,6 @@
 {
   "version": "2",
-  "updated_at": "2026-04-07T13:18:03Z",
+  "updated_at": "2026-04-07T13:25:51Z",
   "skills": {
     "databricks-apps": {
       "version": "0.1.1",
@@ -15,6 +15,7 @@
         "references/appkit/appkit-sdk.md",
         "references/appkit/frontend.md",
         "references/appkit/lakebase.md",
+        "references/appkit/model-serving.md",
         "references/appkit/overview.md",
         "references/appkit/proto-contracts.md",
         "references/appkit/proto-first.md",
@@ -29,7 +30,7 @@
       "version": "0.1.0",
       "description": "Core Databricks skill for CLI, auth, and data exploration",
       "experimental": false,
-      "updated_at": "2026-04-07T13:17:41Z",
+      "updated_at": "2026-04-07T13:17:46Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -44,7 +45,7 @@
       "version": "0.0.0",
       "description": "Declarative Automation Bundles (DABs) for deploying and managing Databricks resources",
       "experimental": false,
-      "updated_at": "2026-04-07T13:17:41Z",
+      "updated_at": "2026-04-07T13:17:46Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -62,7 +63,7 @@
       "version": "0.1.0",
       "description": "Databricks Jobs orchestration and scheduling",
       "experimental": false,
-      "updated_at": "2026-04-07T13:17:41Z",
+      "updated_at": "2026-04-07T13:17:46Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -82,11 +83,23 @@
         "assets/databricks.svg"
       ]
     },
+    "databricks-model-serving": {
+      "version": "0.1.0",
+      "description": "Databricks Model Serving endpoint management",
+      "experimental": true,
+      "updated_at": "2026-04-07T13:25:43Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
     "databricks-pipelines": {
       "version": "0.1.0",
       "description": "Databricks Pipelines (DLT) for ETL and streaming",
       "experimental": false,
-      "updated_at": "2026-04-07T13:17:41Z",
+      "updated_at": "2026-04-07T13:17:46Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",

@@ -36,6 +36,10 @@
         "description": "Declarative Automation Bundles (DABs) for deploying and managing Databricks resources",
         "experimental": False,
     },
+    "databricks-model-serving": {
+        "description": "Databricks Model Serving endpoint management",
+        "experimental": True,
+    },
     "databricks-pipelines": {
         "description": "Databricks Pipelines (DLT) for ETL and streaming",
         "experimental": False,

@@ -23,6 +23,7 @@ Build apps that deploy to Databricks Apps platform.
 | Using `useAnalyticsQuery` | [AppKit SDK](references/appkit/appkit-sdk.md) |
 | Adding API endpoints | [tRPC Guide](references/appkit/trpc.md) |
 | Using Lakebase (OLTP database) | [Lakebase Guide](references/appkit/lakebase.md) |
+| Using Model Serving (ML inference) | [Model Serving Guide](references/appkit/model-serving.md) |
 | Typed data contracts (proto-first design) | [Proto-First Guide](references/appkit/proto-first.md) and [Plugin Contracts](references/appkit/proto-contracts.md) |
 | Platform rules (permissions, deployment, limits) | [Platform Guide](references/platform-guide.md) — READ for ALL apps including AppKit |
 | Non-AppKit app (Streamlit, FastAPI, Flask, Gradio, Next.js, etc.) | [Other Frameworks](references/other-frameworks.md) |
@@ -72,7 +73,7 @@ Before writing any SQL, use the parent `databricks-core` skill for data explorat
 - **Read analytics data → custom display (KPIs, cards)**: Use `useAnalyticsQuery` hook
 - **Read analytics data → need computation before display**: Still use `useAnalyticsQuery`, transform client-side
 - **Read/write persistent data (users, orders, CRUD state)**: Use Lakebase pool via tRPC — see [Lakebase Guide](references/appkit/lakebase.md)
-- **Call ML model endpoint**: Use tRPC
+- **Call ML model endpoint**: Use tRPC — see [Model Serving Guide](references/appkit/model-serving.md)
 - **⚠️ NEVER use tRPC to run SELECT queries against the warehouse** — always use SQL files in `config/queries/`
 - **⚠️ NEVER use `useAnalyticsQuery` for Lakebase data** — it queries the SQL warehouse only
 

@@ -0,0 +1,104 @@
+# Model Serving: Calling ML Endpoints from Apps
+
+Use Model Serving when your app needs **AI features** — chat, inference, embeddings, or predictions from a Databricks Model Serving endpoint. For analytics dashboards, use `config/queries/` instead. For persistent storage, use Lakebase.
+
+## When to Use
+
+| Pattern | Use Case | Data Source |
+|---------|----------|-------------|
+| Analytics | Read-only dashboards, charts, KPIs | SQL Warehouse |
+| Lakebase | CRUD operations, persistent state, forms | PostgreSQL (Lakebase) |
+| Model Serving | Chat, AI features, model inference | Serving Endpoint |
+| Multiple | Dashboard with AI features or persistent state | Combine as needed |
+
+## Scaffolding
+
+Check if the `serving` plugin is available in the AppKit template:
+
+```bash
+databricks apps manifest --profile <PROFILE>
+```
+
+**If the manifest includes a `serving` plugin:**
+
+```bash
+databricks apps init --name <APP_NAME> --features serving \
+  --set "serving.serving-endpoint.name=<ENDPOINT_NAME>" \
+  --run none --profile <PROFILE>
+```
+
+**If no `serving` plugin** (add manually to an existing app):
+
+Use the `databricks-model-serving` skill to create a serving endpoint first, then follow the resource declaration and tRPC patterns below.
+
+## Resource Declaration
+
+Add the serving endpoint resource to `databricks.yml`:
+
+```yaml
+resources:
+  apps:
+    my_app:
+      resources:
+        - name: my-model-endpoint
+          serving_endpoint:
+            name: <ENDPOINT_NAME>
+            permission: CAN_QUERY          # auto-granted to SP on deploy
+```
+
+Add environment variable injection in `app.yaml`:
+
+```yaml
+env:
+  - name: SERVING_ENDPOINT
+    valueFrom: serving-endpoint
+```
+
+The injected value is the endpoint **name** (not a URL). Use it in server-side code to call the endpoint.
+
+## tRPC Pattern
+
+Always use tRPC for model serving calls — do NOT call endpoints directly from the client.
+
+```typescript
+// server/server.ts (or server/trpc.ts)
+import { initTRPC } from "@trpc/server";
+import { getExecutionContext } from "@databricks/appkit";
+import { z } from "zod";
+import superjson from "superjson";
+
+const t = initTRPC.create({ transformer: superjson });
+const publicProcedure = t.procedure;
+
+export const appRouter = t.router({
+  queryModel: publicProcedure
+    .input(z.object({ prompt: z.string() }))
+    .query(async ({ input: { prompt } }) => {
+      const { serviceDatabricksClient: client } = getExecutionContext();
+      const response = await client.servingEndpoints.query({
+        name: process.env.SERVING_ENDPOINT,
+        messages: [{ role: "user", content: prompt }],
+      });
+      return response;
+    }),
+});
+```
+
+## Client-side Pattern
+
+```typescript
+// client/src/components/ChatComponent.tsx
+import { trpc } from "@/lib/trpc";
+
+const result = await trpc.queryModel.query({ prompt: userInput });
+const answer = result.choices?.[0]?.message?.content;
+```
+
+## Troubleshooting
+
+| Error | Cause | Solution |
+|-------|-------|---------|
+| `PERMISSION_DENIED` on query | SP missing CAN_QUERY | Declare `serving_endpoint` resource in `databricks.yml` with `permission: CAN_QUERY` |
+| `SERVING_ENDPOINT` env var empty | Missing env injection | Add `valueFrom: serving-endpoint` to `app.yaml` env section |
+| 504 Gateway Timeout | Inference exceeds 120s proxy limit | Reduce `max_tokens` or use WebSockets — see [Platform Guide](../platform-guide.md) |
+| `getExecutionContext` undefined | Called outside AppKit server context | Ensure call is inside a tRPC procedure on the server side |
@@ -11,6 +11,7 @@ Before scaffolding, decide which data pattern the app needs:
 | **Analytics** (read-only) | Dashboards, charts, KPIs from warehouse | `--features analytics --set analytics.sql-warehouse.id=<ID>` |
 | **Lakebase (OLTP)** (read/write) | CRUD forms, persistent state, user data | `--features lakebase --set lakebase.postgres.branch=<BRANCH> --set lakebase.postgres.database=<DB>` |
 | **Both** | Dashboard + user data or preferences | `--features analytics,lakebase` with all required `--set` flags |
+| **Model Serving** (ML inference) | Chat, AI features, model predictions | Add `serving_endpoint` resource to `databricks.yml` (or `--features serving` if available in manifest) |
 
 See [Lakebase Guide](lakebase.md) for full Lakebase scaffolding and app-code patterns.
 
@@ -122,6 +123,7 @@ Do not guess paths — run without args first, then pick from the index.
 | Add chart/table components | [Frontend](frontend.md) — component quick reference, anti-patterns |
 | Add API mutation endpoints | [tRPC](trpc.md) — only if you need server-side logic |
 | Use Lakebase for CRUD / persistent state | [Lakebase](lakebase.md) — createLakebasePool, tRPC patterns, schema init |
+| Call ML model serving endpoints | [Model Serving](model-serving.md) — resource declaration, tRPC query pattern |
 
 ## Critical Rules
 

@@ -17,6 +17,7 @@ For specific products, use dedicated skills:
 - **databricks-pipelines** - Lakeflow Spark Declarative Pipelines (batch and streaming data pipelines)
 - **databricks-apps** - Full-stack TypeScript app development and deployment
 - **databricks-lakebase** - Lakebase Postgres Autoscaling project management
+- **databricks-model-serving** - Model Serving endpoint management and inference
 
 ## Prerequisites