initializ · initializ-mk · May 19, 2026 · May 19, 2026
diff --git a/docs/core-concepts/channels.md b/docs/core-concepts/channels.md
@@ -199,11 +199,23 @@ The Slack adapter deduplicates events by envelope ID to prevent processing the s
 
 When an agent response exceeds 4096 characters (common with research reports), channel adapters automatically split it into a **summary message** and a **file attachment**:
 
-1. A brief summary (first paragraph, up to 600 characters) is sent as a regular message
+1. A brief summary is sent as a regular inline message
 2. The full report is uploaded as a downloadable Markdown file (`research-report.md`)
 
 This works on both Slack (via `files.getUploadURLExternal`) and Telegram (via `sendDocument`). If file upload fails, adapters fall back to chunked messages. Markdown is converted to platform-native formatting (Slack mrkdwn or Telegram HTML).
 
+### Summary Source
+
+The runtime decides what the inline summary contains:
+
+| Condition | Inline summary source |
+|---|---|
+| Final LLM response > 4096 chars | LLM-generated summary — one extra `Chat()` call asking the model to summarise its own response in 2-4 sentences. Returned to channel adapters as `a2a.Message.Summary` |
+| LLM response ≤ 4096 chars but a tool attached a large file part | The LLM's response text itself — it is already a brief summary of the file content. No extra summariser call |
+| LLM response ≤ 4096 chars, no file part | The full response is sent inline as chunked messages — no attachment, no summary |
+
+If the summariser call fails or returns empty, channel adapters fall back to head-truncating the response body at the first paragraph boundary (≤ 600 chars) or `truncateAtSentence(text, 500)`. The fallback ensures the channel always delivers *something* even when the LLM is unreachable.
+
 Additionally, the runtime tracks large tool outputs (>8000 characters) and attaches them as file parts in the A2A response. This ensures channel adapters receive the complete, untruncated tool output even when the LLM's text summary is truncated by output token limits. JSON tool outputs (e.g. Tavily Research/Search results) are automatically unwrapped into readable markdown before delivery.
 
 ## Container Deployment

diff --git a/forge-core/a2a/types.go b/forge-core/a2a/types.go
@@ -39,9 +39,15 @@ const (
 )
 
 // Message is a single conversational turn in the A2A protocol.
+//
+// Summary, when set, is a short LLM-generated synopsis of the agent's full
+// response. Channel adapters prefer it over head-truncating the verbose body
+// when an inline-friendly message is needed. Empty for short responses where
+// the full text already fits inline.
 type Message struct {
-	Role  MessageRole `json:"role"`
-	Parts []Part      `json:"parts"`
+	Role    MessageRole `json:"role"`
+	Parts   []Part      `json:"parts"`
+	Summary string      `json:"summary,omitempty"`
 }
 
 // PartKind discriminates the content type of a Part.

diff --git a/forge-core/runtime/loop.go b/forge-core/runtime/loop.go
@@ -397,7 +397,7 @@ func (e *LLMExecutor) Execute(ctx context.Context, task *a2a.Task, msg *a2a.Mess
 				resp.Message.Content = "I processed your request but wasn't able to produce a response. Please try again."
 			}
 			e.persistSession(task.ID, mem)
-			return llmMessageToA2A(resp.Message, largeToolOutputs...), nil
+			return e.finalizeResponse(ctx, resp.Message, largeToolOutputs...), nil
 		}
 
 		// Execute tool calls
@@ -406,7 +406,7 @@ func (e *LLMExecutor) Execute(ctx context.Context, task *a2a.Task, msg *a2a.Mess
 				resp.Message.Content = "I processed your request but wasn't able to produce a response. Please try again."
 			}
 			e.persistSession(task.ID, mem)
-			return llmMessageToA2A(resp.Message, largeToolOutputs...), nil
+			return e.finalizeResponse(ctx, resp.Message, largeToolOutputs...), nil
 		}
 
 		// The LLM made tool calls -- it's making progress. Allow
@@ -647,6 +647,24 @@ func llmMessageToA2A(msg llm.ChatMessage, extraParts ...a2a.Part) *a2a.Message {
 	}
 }
 
+// finalizeResponse builds the A2A message for the final agent response and,
+// when the LLM body itself is long enough to be split into "inline summary +
+// attached markdown" by channel adapters, asks the LLM for a real summary.
+// The summary is attached as Message.Summary; channels prefer it over
+// head-truncating the verbose body. Failure to produce a summary is non-fatal
+// — channels fall back to their existing head-truncation behavior.
+//
+// When the LLM text is short but a tool produced a large file part, no
+// summariser pass is needed: the LLM text is already the summary of the file
+// content, and channel adapters already use it that way.
+func (e *LLMExecutor) finalizeResponse(ctx context.Context, msg llm.ChatMessage, extraParts ...a2a.Part) *a2a.Message {
+	out := llmMessageToA2A(msg, extraParts...)
+	if len(msg.Content) > summaryInlineThreshold {
+		out.Summary = generateSummary(ctx, e.client, msg.Content)
+	}
+	return out
+}
+
 // isWriteActionTool returns true for tools that modify state (edit, write,
 // commit, push, create PR) as opposed to read-only tools (read, grep, glob,
 // directory_tree, clone, status).

diff --git a/forge-core/runtime/summary.go b/forge-core/runtime/summary.go
@@ -0,0 +1,38 @@
+package runtime
+
+import (
+	"context"
+	"strings"
+
+	"github.com/initializ/forge/forge-core/llm"
+)
+
+// summaryInlineThreshold is the response length (in characters) above which
+// the runtime asks the LLM for a short summary that channel adapters can use
+// instead of head-truncating the verbose response.
+const summaryInlineThreshold = 4096
+
+// summaryPrompt is the system instruction used for the one-shot summariser
+// call. The model receives the verbose response as the user message and must
+// return only the summary text.
+const summaryPrompt = `You are summarising an agent's response so a chat channel (Slack, Telegram, etc.) can show a brief, useful preview while the full text is attached as a file. Write 2-4 sentences (or 3-5 bullets if the response is structured) that convey the answer or conclusion — not the introduction. Do not say "this report" or "the response". Do not add any preamble. Plain markdown only.`
+
+// generateSummary asks the LLM for a short summary of the given response text.
+// Returns an empty string on any error or empty completion — callers fall back
+// to head-truncation in that case. The summariser is best-effort and never
+// fails the request.
+func generateSummary(ctx context.Context, client llm.Client, response string) string {
+	if client == nil || strings.TrimSpace(response) == "" {
+		return ""
+	}
+	resp, err := client.Chat(ctx, &llm.ChatRequest{
+		Messages: []llm.ChatMessage{
+			{Role: llm.RoleSystem, Content: summaryPrompt},
+			{Role: llm.RoleUser, Content: response},
+		},
+	})
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(resp.Message.Content)
+}
diff --git a/forge-core/runtime/summary_test.go b/forge-core/runtime/summary_test.go
@@ -0,0 +1,128 @@
+package runtime
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/initializ/forge/forge-core/a2a"
+	"github.com/initializ/forge/forge-core/llm"
+)
+
+func TestGenerateSummary_HappyPath(t *testing.T) {
+	var capturedReq *llm.ChatRequest
+	client := &mockLLMClient{
+		chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) {
+			capturedReq = req
+			return &llm.ChatResponse{
+				Message: llm.ChatMessage{Role: llm.RoleAssistant, Content: "  short summary here  "},
+			}, nil
+		},
+	}
+
+	got := generateSummary(context.Background(), client, "long response text")
+	if got != "short summary here" {
+		t.Errorf("expected trimmed summary, got %q", got)
+	}
+	if capturedReq == nil || len(capturedReq.Messages) != 2 {
+		t.Fatalf("expected 2 messages in summariser request, got %+v", capturedReq)
+	}
+	if capturedReq.Messages[0].Role != llm.RoleSystem {
+		t.Errorf("expected system message first, got role %q", capturedReq.Messages[0].Role)
+	}
+	if capturedReq.Messages[1].Content != "long response text" {
+		t.Errorf("expected response as user message, got %q", capturedReq.Messages[1].Content)
+	}
+}
+
+func TestGenerateSummary_NilClient(t *testing.T) {
+	if got := generateSummary(context.Background(), nil, "anything"); got != "" {
+		t.Errorf("expected empty summary with nil client, got %q", got)
+	}
+}
+
+func TestGenerateSummary_EmptyResponse(t *testing.T) {
+	client := &mockLLMClient{
+		chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) {
+			t.Fatal("client should not be called for empty response")
+			return nil, nil
+		},
+	}
+	if got := generateSummary(context.Background(), client, "   \n\t  "); got != "" {
+		t.Errorf("expected empty summary for whitespace input, got %q", got)
+	}
+}
+
+func TestGenerateSummary_ClientError(t *testing.T) {
+	client := &mockLLMClient{
+		chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) {
+			return nil, fmt.Errorf("simulated network failure")
+		},
+	}
+	if got := generateSummary(context.Background(), client, "long response"); got != "" {
+		t.Errorf("expected empty summary on client error (best-effort), got %q", got)
+	}
+}
+
+func TestFinalizeResponse_SetsSummaryWhenLong(t *testing.T) {
+	summariserCalls := 0
+	client := &mockLLMClient{
+		chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) {
+			summariserCalls++
+			return &llm.ChatResponse{
+				Message: llm.ChatMessage{Role: llm.RoleAssistant, Content: "generated summary"},
+			}, nil
+		},
+	}
+	exec := &LLMExecutor{client: client}
+
+	long := strings.Repeat("a", summaryInlineThreshold+1)
+	got := exec.finalizeResponse(context.Background(), llm.ChatMessage{Role: llm.RoleAssistant, Content: long})
+
+	if got.Summary != "generated summary" {
+		t.Errorf("expected Summary populated, got %q", got.Summary)
+	}
+	if summariserCalls != 1 {
+		t.Errorf("expected exactly 1 summariser call, got %d", summariserCalls)
+	}
+}
+
+func TestFinalizeResponse_SkipsSummaryWhenShort(t *testing.T) {
+	client := &mockLLMClient{
+		chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) {
+			t.Fatal("summariser should not be called for short responses")
+			return nil, nil
+		},
+	}
+	exec := &LLMExecutor{client: client}
+
+	got := exec.finalizeResponse(context.Background(), llm.ChatMessage{Role: llm.RoleAssistant, Content: "short"})
+	if got.Summary != "" {
+		t.Errorf("expected empty Summary for short response, got %q", got.Summary)
+	}
+}
+
+// TestFinalizeResponse_NoSummaryForShortTextWithFile verifies that when the
+// LLM body is short but a tool attached a large file, no summariser pass runs.
+// The LLM text already acts as the summary of the file, so a second pass would
+// just summarise an already-short string.
+func TestFinalizeResponse_NoSummaryForShortTextWithFile(t *testing.T) {
+	client := &mockLLMClient{
+		chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) {
+			t.Fatal("summariser should not be called when LLM body is short")
+			return nil, nil
+		},
+	}
+	exec := &LLMExecutor{client: client}
+
+	fileBacked := a2a.Part{Kind: a2a.PartKindFile, File: &a2a.FileContent{Name: "report.md", Bytes: []byte("huge file content")}}
+	got := exec.finalizeResponse(context.Background(), llm.ChatMessage{Role: llm.RoleAssistant, Content: "Here is the report."}, fileBacked)
+
+	if got.Summary != "" {
+		t.Errorf("expected empty Summary for short body, got %q", got.Summary)
+	}
+	if len(got.Parts) != 2 {
+		t.Errorf("expected text + file parts, got %d parts", len(got.Parts))
+	}
+}
diff --git a/forge-plugins/channels/slack/slack.go b/forge-plugins/channels/slack/slack.go
@@ -559,10 +559,14 @@ func (p *Plugin) SendResponse(event *channels.ChannelEvent, response *a2a.Messag
 			return p.sendChunked(event, fileContent)
 		}
 
-		// File uploaded — send the LLM text as summary with "attached" note.
-		summary := text
-		if len(summary) > 600 {
-			summary, _ = markdown.SplitSummaryAndReport(summary)
+		// File uploaded — prefer the runtime-generated summary when available,
+		// otherwise fall back to head-truncating the LLM text.
+		summary := response.Summary
+		if summary == "" {
+			summary = text
+			if len(summary) > 600 {
+				summary, _ = markdown.SplitSummaryAndReport(summary)
+			}
 		}
 		summaryText := summary + "\n\n_Full report attached as file above._"
 		summaryMrkdwn := markdown.ToSlackMrkdwn(summaryText)
@@ -579,7 +583,11 @@ func (p *Plugin) SendResponse(event *channels.ChannelEvent, response *a2a.Messag
 
 	// No file parts — use text-based logic.
 	if len(text) > 4096 {
-		summary, report := markdown.SplitSummaryAndReport(text)
+		summary := response.Summary
+		report := text
+		if summary == "" {
+			summary, report = markdown.SplitSummaryAndReport(text)
+		}
 
 		threadTS := event.ThreadID
 		if threadTS == "" {

diff --git a/forge-plugins/channels/slack/slack_test.go b/forge-plugins/channels/slack/slack_test.go
@@ -737,6 +737,104 @@ func TestSendResponse_WithFilePart(t *testing.T) {
 	}
 }
 
+// TestSendResponse_PrefersMessageSummary verifies that when a2a.Message.Summary
+// is set, channel adapters use it instead of head-truncating the body — so a
+// 5K-char response with a one-sentence runtime summary delivers that summary,
+// not the first 500 chars of the body, to the channel.
+func TestSendResponse_PrefersMessageSummary(t *testing.T) {
+	var lastPostText string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/files.getUploadURLExternal":
+			w.Write([]byte(`{"ok":true,"upload_url":"` + "http://" + r.Host + `/upload","file_id":"F456"}`)) //nolint:errcheck
+		case "/upload":
+			w.WriteHeader(http.StatusOK)
+		case "/files.completeUploadExternal":
+			w.Write([]byte(`{"ok":true}`)) //nolint:errcheck
+		case "/chat.postMessage":
+			body, _ := io.ReadAll(r.Body)
+			var payload map[string]any
+			json.Unmarshal(body, &payload) //nolint:errcheck
+			lastPostText, _ = payload["text"].(string)
+			w.Write([]byte(`{"ok":true}`)) //nolint:errcheck
+		default:
+			w.WriteHeader(http.StatusNotFound)
+		}
+	}))
+	defer srv.Close()
+
+	p := New()
+	p.botToken = "xoxb-test"
+	p.apiBase = srv.URL
+
+	event := &channels.ChannelEvent{WorkspaceID: "C123", MessageID: "1234.5"}
+
+	longBody := strings.Repeat("This is the full untruncated body text. ", 200) // ~8 KB
+	msg := &a2a.Message{
+		Role:    a2a.MessageRoleAgent,
+		Parts:   []a2a.Part{a2a.NewTextPart(longBody)},
+		Summary: "TL;DR: the build succeeded after upgrading Go.",
+	}
+
+	if err := p.SendResponse(event, msg); err != nil {
+		t.Fatalf("SendResponse: %v", err)
+	}
+
+	if !strings.Contains(lastPostText, "TL;DR: the build succeeded") {
+		t.Errorf("expected runtime Summary in post, got %q", lastPostText)
+	}
+	// First 200 chars of the body should NOT appear inline — only the Summary should.
+	if strings.Contains(lastPostText, "This is the full untruncated body text. This is the full") {
+		t.Errorf("body leaked into inline message; expected only Summary, got %q", lastPostText)
+	}
+}
+
+// TestSendResponse_FallsBackWhenNoSummary verifies that when Summary is empty,
+// channel adapters fall back to the existing head-truncation behavior (the
+// pre-Summary contract is preserved for callers that don't populate it).
+func TestSendResponse_FallsBackWhenNoSummary(t *testing.T) {
+	var lastPostText string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/files.getUploadURLExternal":
+			w.Write([]byte(`{"ok":true,"upload_url":"` + "http://" + r.Host + `/upload","file_id":"F456"}`)) //nolint:errcheck
+		case "/upload":
+			w.WriteHeader(http.StatusOK)
+		case "/files.completeUploadExternal":
+			w.Write([]byte(`{"ok":true}`)) //nolint:errcheck
+		case "/chat.postMessage":
+			body, _ := io.ReadAll(r.Body)
+			var payload map[string]any
+			json.Unmarshal(body, &payload) //nolint:errcheck
+			lastPostText, _ = payload["text"].(string)
+			w.Write([]byte(`{"ok":true}`)) //nolint:errcheck
+		default:
+			w.WriteHeader(http.StatusNotFound)
+		}
+	}))
+	defer srv.Close()
+
+	p := New()
+	p.botToken = "xoxb-test"
+	p.apiBase = srv.URL
+
+	event := &channels.ChannelEvent{WorkspaceID: "C123", MessageID: "1234.5"}
+
+	longBody := "Lead sentence summary of the answer.\n\n" + strings.Repeat("Body filler. ", 500)
+	msg := &a2a.Message{
+		Role:  a2a.MessageRoleAgent,
+		Parts: []a2a.Part{a2a.NewTextPart(longBody)},
+	}
+
+	if err := p.SendResponse(event, msg); err != nil {
+		t.Fatalf("SendResponse: %v", err)
+	}
+
+	if !strings.Contains(lastPostText, "Lead sentence summary of the answer.") {
+		t.Errorf("expected head-truncated lead in post, got %q", lastPostText)
+	}
+}
+
 func TestDedupCache_FirstSeen(t *testing.T) {
 	p := New()
 	if p.isDuplicate("env-1") {