From d177b8339f3421c8cec9df99076f113b19602f91 Mon Sep 17 00:00:00 2001 From: ColonistOne Date: Tue, 7 Apr 2026 18:57:22 +0100 Subject: [PATCH] Validate LLM responses and warn on unexpected formats The agent now logs warnings when the LLM ignores the expected response format instead of silently falling through: - Vote parsing: warns when response contains neither UPVOTE, DOWNVOTE, nor SKIP (LLM ignored the instruction) - Comment extraction: debug log for freeform format (no COMMENT: prefix), warning for ambiguous short responses - Reply to comments: rejects replies under 10 chars with warning (prevents posting "ok" or "Thanks!" as a reply) Co-Authored-By: Claude Opus 4.6 (1M context) --- colony_agent/agent.py | 37 ++++++++++++++++++++++++++++++++++++- tests/test_agent.py | 28 +++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/colony_agent/agent.py b/colony_agent/agent.py index bf32199..377e8b1 100644 --- a/colony_agent/agent.py +++ b/colony_agent/agent.py @@ -336,6 +336,12 @@ def _browse_and_engage(self) -> None: vote_value = 1 elif "DOWNVOTE" in response_upper: vote_value = -1 + elif "SKIP" not in response_upper: + log.warning( + "LLM response missing vote keyword (UPVOTE/DOWNVOTE/SKIP) " + "for '%s': %s", + title[:40], response[:100], + ) if vote_value != 0: direction = "upvote" if vote_value == 1 else "downvote" @@ -414,6 +420,14 @@ def _check_replies_to_own_post(self, post: dict) -> None: self.state.mark_replied_to_comment(comment_id) continue + if len(reply.strip()) < 10: + log.warning( + "LLM reply to %s too short to post (%d chars): '%s'", + c_author, len(reply.strip()), reply.strip(), + ) + self.state.mark_replied_to_comment(comment_id) + continue + if self.dry_run: self._dry_run_actions.append(("reply", f"{c_author} on '{title[:40]}'", reply[:200])) continue @@ -483,15 +497,36 @@ def _extract_comment(self, response: str) -> str: comment = stripped[8:].strip().lstrip("-").strip() if comment and comment.upper() != "SKIP": return comment + return "" # Explicit SKIP after COMMENT: + + # Check for explicit SKIP + upper = response.upper() + if "SKIP" in upper: + return "" # If no COMMENT: prefix, check if the whole response looks like a comment # (not just VOTE/SKIP keywords) clean = response.strip() - skip_words = {"UPVOTE", "DOWNVOTE", "SKIP", "VOTE:"} + skip_words = {"UPVOTE", "DOWNVOTE", "VOTE:"} if any(clean.upper().startswith(w) for w in skip_words): + # Response only has vote keywords, no comment content + log.debug( + "LLM response has no comment section: %s", response[:100], + ) return "" if len(clean) > 20: # Likely a real comment, not just a keyword + log.debug( + "LLM response used freeform format (no COMMENT: prefix): %s", + clean[:60], + ) return clean + + # Short, ambiguous response — not a valid comment + if clean: + log.warning( + "LLM response too short/ambiguous to use as comment: '%s'", + clean, + ) return "" # ── Memory management ──────────────────────────────────────────── diff --git a/tests/test_agent.py b/tests/test_agent.py index d89f7b2..075b8a8 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -240,6 +240,23 @@ def test_skips_when_llm_says_skip(self, mock_chat, agent): agent.client.vote_post.assert_not_called() agent.client.create_comment.assert_not_called() + @patch("colony_agent.agent.chat", return_value="I think this post is really interesting and thought-provoking.") + def test_warns_on_missing_vote_keyword(self, mock_chat, agent, caplog): + agent.client.get_me.return_value = {"username": "testbot"} + agent.client.get_posts.return_value = { + "posts": [ + { + "id": "p1", "title": "Some post", "body": "Content.", + "author": {"username": "other"}, + } + ] + } + import logging + with caplog.at_level(logging.WARNING, logger="colony-agent"): + agent.heartbeat() + assert any("missing vote keyword" in r.message for r in caplog.records) + agent.client.vote_post.assert_not_called() + @patch("colony_agent.agent.chat", return_value="") def test_no_action_when_llm_fails(self, mock_chat, agent): agent.client.get_me.return_value = {"username": "testbot"} @@ -581,7 +598,7 @@ def test_skip_reply_still_marks_as_handled(self, mock_chat, agent): agent.client.create_comment.assert_not_called() assert agent.state.has_replied_to_comment("c1") - @patch("colony_agent.agent.chat", return_value="Thanks!") + @patch("colony_agent.agent.chat", return_value="Thanks for the thoughtful feedback, really appreciate it!") def test_respects_comment_limit(self, mock_chat, tmp_path): config = make_config( tmp_path, @@ -917,3 +934,12 @@ def test_plain_comment_text(self, agent): def test_short_text_ignored(self, agent): assert agent._extract_comment("ok") == "" + + def test_vote_only_response_no_comment(self, agent): + assert agent._extract_comment("VOTE: UPVOTE") == "" + + def test_skip_in_middle_of_text(self, agent): + assert agent._extract_comment("I'll SKIP this one, nothing to add.") == "" + + def test_comment_skip_explicit(self, agent): + assert agent._extract_comment("VOTE: UPVOTE\nCOMMENT: SKIP") == ""