agent-runtime/agent/nodes/thinker.py

"""Thinker Node: S3 — control, reasoning, tool use."""

import json
import logging
import re

from .base import Node
from ..llm import llm_call
from ..process import ProcessManager
from ..types import Command, ThoughtResult

log = logging.getLogger("runtime")


class ThinkerNode(Node):
    name = "thinker"
    model = "google/gemini-2.5-flash"
    max_context_tokens = 4000

    SYSTEM = """You are the Thinker node — the brain of this cognitive runtime.
You receive a perception of what the user said. Decide: answer directly or use a tool.

TOOLS — write a ```python code block and it WILL be executed. Use print() for output.
- For math, databases, file ops, any computation: write python. NEVER describe code — write it.
- For simple conversation: respond directly as text.

A separate UI node handles all visual controls (buttons, tables). Just focus on reasoning and content.

{memory_context}"""

    def __init__(self, send_hud, process_manager: ProcessManager = None):
        super().__init__(send_hud)
        self.pm = process_manager

    def _parse_tool_call(self, response: str) -> tuple[str, str] | None:
        """Parse tool calls. Supports TOOL: format and auto-detects python code blocks."""
        text = response.strip()

        if text.startswith("TOOL:"):
            lines = text.split("\n")
            tool_name = lines[0].replace("TOOL:", "").strip()
            code_lines = []
            in_code = False
            for line in lines[1:]:
                if line.strip().startswith("```") and not in_code:
                    in_code = True
                    continue
                elif line.strip().startswith("```") and in_code:
                    break
                elif in_code:
                    code_lines.append(line)
                elif line.strip().startswith("CODE:"):
                    continue
            return (tool_name, "\n".join(code_lines)) if code_lines else None

        block_match = re.search(r'```(python|py|sql|sqlite|sh|bash|tool_code)?\s*\n(.*?)```', text, re.DOTALL)
        if block_match:
            lang = (block_match.group(1) or "").lower()
            code = block_match.group(2).strip()
            if code and len(code.split("\n")) > 0:
                # Only wrap raw SQL blocks — never re-wrap python that happens to contain SQL keywords
                if lang in ("sql", "sqlite"):
                    wrapped = f'''import sqlite3
conn = sqlite3.connect("/tmp/cog_db.sqlite")
cursor = conn.cursor()
for stmt in """{code}""".split(";"):
    stmt = stmt.strip()
    if stmt:
        cursor.execute(stmt)
conn.commit()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = cursor.fetchall()
for t in tables:
    cursor.execute(f"SELECT * FROM {{t[0]}}")
    rows = cursor.fetchall()
    cols = [d[0] for d in cursor.description]
    print(f"Table: {{t[0]}}")
    print(" | ".join(cols))
    for row in rows:
        print(" | ".join(str(c) for c in row))
conn.close()'''
                    return ("python", wrapped)
                return ("python", code)

        return None

    def _strip_code_blocks(self, response: str) -> str:
        """Remove code blocks, return plain text."""
        text = re.sub(r'```(?:python|py|sql|sqlite|sh|bash|tool_code).*?```', '', response, flags=re.DOTALL)
        return text.strip()

    async def process(self, command: Command, history: list[dict], memory_context: str = "") -> ThoughtResult:
        await self.hud("thinking", detail="reasoning about response")

        messages = [
            {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
        ]
        for msg in history[-12:]:
            messages.append(msg)
        messages.append({"role": "system", "content": f"Input perception: {command.instruction}"})
        messages = self.trim_context(messages)

        await self.hud("context", messages=messages, tokens=self.last_context_tokens,
                       max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)

        response = await llm_call(self.model, messages)
        if not response:
            response = "[no response from LLM]"
        log.info(f"[thinker] response: {response[:200]}")

        tool_call = self._parse_tool_call(response)
        if tool_call:
            tool_name, code = tool_call

            if self.pm and tool_name == "python":
                proc = await self.pm.execute(tool_name, code)
                tool_output = "\n".join(proc.output_lines)
            else:
                tool_output = f"[unknown tool: {tool_name}]"

            log.info(f"[thinker] tool output: {tool_output[:200]}")

            # Second call: interpret tool output
            messages.append({"role": "assistant", "content": response})
            messages.append({"role": "system", "content": f"Tool output:\n{tool_output}"})
            messages.append({"role": "user", "content": "Respond to the user based on the tool output. Be natural and concise."})
            messages = self.trim_context(messages)
            final = await llm_call(self.model, messages)

            clean_text = self._strip_code_blocks(final)
            await self.hud("decided", instruction=clean_text[:200])
            return ThoughtResult(response=clean_text, tool_used=tool_name,
                                tool_output=tool_output)

        clean_text = self._strip_code_blocks(response) or response
        await self.hud("decided", instruction="direct response (no tools)")
        return ThoughtResult(response=clean_text)