codomyrmex/scripts/agents/agent_utils.py at main · docxology/codomyrmex · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""Agent Utilities for Real Conversational Integration.

Provides a unified factory to get a real LLM client (Claude or Ollama).
Strictly enforces real/live functionality - no mocks allowed.
"""

import json
import logging
import os
import time
import urllib.error
import urllib.request
from dataclasses import dataclass
from typing import Any


@dataclass
class AgentRequest:
    prompt: str
    metadata: dict[str, Any] = None


# Try to import real Claude client
try:
    from codomyrmex.agents.claude.claude_client import ClaudeClient
except ImportError:
    ClaudeClient = None

logger = logging.getLogger(__name__)


class OllamaClient:
    """Client for local Ollama instance (REST API).

    Implements a robust interface compatible with ClaudeClient
    for use in ClaudeCodeEndpoint, using real LLM inference.
    """

    def __init__(self, model="llama3", base_url="http://localhost:11434"):
        self.model = model
        self.base_url = base_url
        self.session_manager = None  # dummy for interface compatibility

    def create_session(self, session_id):
        # Ollama manages context internally via /api/chat if messages are sent
        # For this simple client, we rely on prompt context or stateless calls
        return None

    def execute_with_session(self, request, session=None, session_id=None):
        """Execute request using Ollama /api/chat for real conversation."""
        url = f"{self.base_url}/api/chat"

        # Construct chat messages
        # Ideally we would pull history from session, but for now we wrap the prompt
        messages = [{"role": "user", "content": request.prompt}]

        # Check if system prompt is embedded in context or prompt
        # (Naive heuristic for demo scripts)
        if "System:" in request.prompt:
            parts = request.prompt.split("System:", 1)
            if len(parts) > 1:
                sys_instruction, user_msg = (
                    parts[1].split("\n", 1) if "\n" in parts[1] else (parts[1], "")
                )
                messages = [
                    {"role": "system", "content": sys_instruction.strip()},
                    {"role": "user", "content": user_msg.strip() or "Proceed."},
                ]

        payload = {"model": self.model, "messages": messages, "stream": False}

        start_time = time.monotonic()
        content = ""
        try:
            req = urllib.request.Request(
                url,
                data=json.dumps(payload).encode("utf-8"),
                headers={"Content-Type": "application/json"},
            )
            with urllib.request.urlopen(req) as response:
                if response.status == 200:
                    data = json.loads(response.read().decode("utf-8"))
                    # /api/chat returns 'message': {'content': ...}
                    msg = data.get("message", {})
                    content = msg.get("content", "")
                else:
                    raise RuntimeError(f"Ollama returned {response.status}")
        except Exception as e:
            # Propagate error with context
            try:
                # Try to list models to help debugging
                with urllib.request.urlopen(
                    f"{self.base_url}/api/tags", timeout=1.0
                ) as resp:
                    if resp.status == 200:
                        tags = json.loads(resp.read().decode("utf-8"))
                        models = [m.get("name") for m in tags.get("models", [])]
                        print(f"DEBUG: Available models: {models}")
            except Exception as debug_err:
                logger.debug("Could not list Ollama models for debug: %s", debug_err)
            raise RuntimeError(f"Real Ollama Connection Failed: {e}") from e

        elapsed = time.monotonic() - start_time

        class Response:
            def is_success(self):
                return True

        resp = Response()
        resp.content = content
        resp.tokens_used = 0
        resp.execution_time = elapsed
        return resp


def get_llm_client(identity="agent"):
    """Factory to get the best available REAL LLM client.

    Priority:
    1. ClaudeClient (if ANTHROPIC_API_KEY set)
    2. OllamaClient (if reachable)

    Raises RuntimeError if no real client is available.
    """
    # 1. Check Claude
    if ClaudeClient and os.environ.get("ANTHROPIC_API_KEY"):
        print(f"[{identity}] Using real ClaudeClient (API Key found)")
        return ClaudeClient()

    # 2. Check Ollama
    try:
        # Quick health check
        with urllib.request.urlopen(
            "http://localhost:11434/api/tags", timeout=1.0
        ) as resp:
            if resp.status == 200:
                # Use configured model or default
                model = os.environ.get("OLLAMA_MODEL", "codellama:latest")
                print(
                    f"[{identity}] Using real OllamaClient (Localhost reachable, model={model})"
                )
                return OllamaClient(model=model)
    except Exception as e:
        logger.debug("Ollama not reachable: %s", e)

    raise RuntimeError(
        f"[{identity}] CRITICAL: No Real LLM Available.\n"
        "Please set ANTHROPIC_API_KEY for Claude,\n"
        "OR ensure Ollama is running at http://localhost:11434.\n"
        "Mocks are strictly forbidden."
    )