v0.6.2: Thinker node with python tool execution (S3 Control)

- ThinkerNode: reasons about perception, decides tool use vs direct answer - Python tool: subprocess execution with 10s timeout - Auto-detects python code blocks in LLM output and executes them - Tool call/result visible in trace + HUD - Thinker meter in frontend (token budget: 4K) - Flow: Input (perceive) -> Thinker (reason + tools) -> Output (speak) - Tested: math (42*137=5754), SQLite (create+query), time, greetings Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 01:04:22 +01:00 · 2026-03-28 01:04:22 +01:00 · 8b69e6dd0d
commit 8b69e6dd0d
parent 5c7aece397
5 changed files with 178 additions and 10 deletions
--- a/agent.py
+++ b/agent.py
@ -147,12 +147,20 @@ class Envelope:
@dataclass
 class Command:
-    """Input node's decision — tells Output what to do."""
+    """Input node's perception — describes what was heard."""
-    instruction: str      # natural language command for Output LLM
+    instruction: str      # natural language perception
-    source_text: str      # original user message (Output may need it)
+    source_text: str      # original user message
    metadata: dict = field(default_factory=dict)
@dataclass
 class ThoughtResult:
    """Thinker node's output — either a direct answer or tool results."""
    response: str         # what to tell the user (direct or post-tool)
    tool_used: str = ""   # which tool was called (empty if none)
    tool_output: str = "" # raw tool output
 # --- Base Node ---
 def estimate_tokens(text: str) -> int:
@ -464,6 +472,141 @@ Be natural. Be concise. If the user asks you to do something, do it — don't de
        return full_response
 # --- Thinker Node (S3 — control, reasoning, tool use) ---
 import subprocess
 import tempfile
 class ThinkerNode(Node):
    name = "thinker"
    model = "google/gemini-2.0-flash-001"
    max_context_tokens = 4000
    SYSTEM = """You are the Thinker node — the brain of this cognitive runtime.
 You receive a perception of what the user said. Decide: answer directly or use a tool.
 TOOL FORMAT — when you need to compute, query, or create something, respond with ONLY:
 TOOL: python
 CODE:
 ```
 print("result here")
 ```
 RULES:
 - For math, databases, file ops, any computation: write a ```python code block. It WILL be executed.
 - For simple conversation (greetings, opinions, knowledge): respond directly as text.
 - Your python code runs in a real environment. Use print() for output.
 - NEVER describe code — write it. It will run automatically.
 {memory_context}"""
    def _parse_tool_call(self, response: str) -> tuple[str, str] | None:
        """Parse tool calls. Supports TOOL: format and auto-detects python code blocks."""
        text = response.strip()
        # Explicit TOOL: format
        if text.startswith("TOOL:"):
            lines = text.split("\n")
            tool_name = lines[0].replace("TOOL:", "").strip()
            code_lines = []
            in_code = False
            for line in lines[1:]:
                if line.strip().startswith("```") and not in_code:
                    in_code = True
                    continue
                elif line.strip().startswith("```") and in_code:
                    break
                elif in_code:
                    code_lines.append(line)
                elif line.strip().startswith("CODE:"):
                    continue
            return (tool_name, "\n".join(code_lines)) if code_lines else None
        # Auto-detect: if response is mostly a python code block, execute it
        if "```python" in text or "```py" in text:
            code_lines = []
            in_code = False
            for line in text.split("\n"):
                if ("```python" in line or "```py" in line) and not in_code:
                    in_code = True
                    continue
                elif line.strip() == "```" and in_code:
                    break
                elif in_code:
                    code_lines.append(line)
            if code_lines and len(code_lines) > 1:
                return ("python", "\n".join(code_lines))
        return None
    async def process(self, command: Command, history: list[dict], memory_context: str = "") -> ThoughtResult:
        await self.hud("thinking", detail="reasoning about response")
        messages = [
            {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
        ]
        for msg in history[-12:]:
            messages.append(msg)
        messages.append({"role": "system", "content": f"Input perception: {command.instruction}"})
        messages = self.trim_context(messages)
        await self.hud("context", messages=messages, tokens=self.last_context_tokens,
                       max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)
        response = await llm_call(self.model, messages)
        log.info(f"[thinker] response: {response[:200]}")
        # Check if Thinker wants to use a tool
        tool_call = self._parse_tool_call(response)
        if tool_call:
            tool_name, code = tool_call
            await self.hud("tool_call", tool=tool_name, code=code[:200])
            log.info(f"[thinker] calling tool: {tool_name}")
            if tool_name == "python":
                loop = asyncio.get_event_loop()
                tool_output = await loop.run_in_executor(None, self._run_python_sync, code)
            else:
                tool_output = f"[unknown tool: {tool_name}]"
            await self.hud("tool_result", tool=tool_name, output=tool_output[:500])
            log.info(f"[thinker] tool output: {tool_output[:200]}")
            # Second LLM call: interpret tool output
            messages.append({"role": "assistant", "content": response})
            messages.append({"role": "system", "content": f"Tool output:\n{tool_output}"})
            messages.append({"role": "user", "content": "Now respond to the user based on the tool output. Be natural and concise."})
            messages = self.trim_context(messages)
            final = await llm_call(self.model, messages)
            await self.hud("decided", instruction=final[:200])
            return ThoughtResult(response=final, tool_used=tool_name, tool_output=tool_output)
        # No tool needed — pass through
        await self.hud("decided", instruction="direct response (no tools)")
        return ThoughtResult(response=response)
    def _run_python_sync(self, code: str) -> str:
        """Sync wrapper for subprocess execution."""
        try:
            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as f:
                f.write(code)
                f.flush()
                result = subprocess.run(
                    ['python3', f.name],
                    capture_output=True, text=True, timeout=10,
                    cwd=tempfile.gettempdir()
                )
                output = result.stdout
                if result.returncode != 0:
                    output += f"\n[stderr: {result.stderr.strip()}]"
                return output.strip() or "[no output]"
        except subprocess.TimeoutExpired:
            return "[error: execution timed out after 10s]"
        except Exception as e:
            return f"[error: {e}]"
 # --- Memorizer Node (S2 — shared state / coordination) ---
 class MemorizerNode(Node):
@ -573,6 +716,7 @@ class Runtime:
        self.history: list[dict] = []
        self.MAX_HISTORY = 40  # sliding window — oldest messages drop off
        self.input_node = InputNode(send_hud=self._send_hud)
        self.thinker = ThinkerNode(send_hud=self._send_hud)
        self.output_node = OutputNode(send_hud=self._send_hud)
        self.memorizer = MemorizerNode(send_hud=self._send_hud)
        self.sensor = SensorNode(send_hud=self._send_hud)
@ -622,12 +766,29 @@ class Runtime:
        sensor_lines = self.sensor.get_context_lines()
        mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines)
-        # Input node decides (with memory context + identity + channel)
+        # Input node perceives (with memory context + identity + channel)
        command = await self.input_node.process(
            envelope, self.history, memory_context=mem_ctx,
            identity=self.identity, channel=self.channel)
-        # Output node executes (with memory context + history including user msg)
+        # Thinker node reasons + optionally uses tools
        thought = await self.thinker.process(command, self.history, memory_context=mem_ctx)
        # If Thinker used a tool, inject its result into the command for Output
        if thought.tool_used:
            # Thinker already formulated the response — Output just streams it
            command = Command(
                instruction=f"Thinker used {thought.tool_used} and says: {thought.response}",
                source_text=command.source_text
            )
        else:
            # Thinker answered directly — Output streams that
            command = Command(
                instruction=f"Thinker says: {thought.response}",
                source_text=command.source_text
            )
        # Output node streams the response
        response = await self.output_node.process(command, self.history, self.ws, memory_context=mem_ctx)
        self.history.append({"role": "assistant", "content": response})
--- a/static/app.js
+++ b/static/app.js
@ -167,6 +167,12 @@ function handleHud(data) {
    const detail = JSON.stringify(data.state, null, 2);
    addTrace(node, 'state', pairs, 'state', detail);
  } else if (event === 'tool_call') {
    addTrace(node, 'tool: ' + data.tool, truncate(data.code || '', 80), 'instruction', data.code);
  } else if (event === 'tool_result') {
    addTrace(node, 'result', truncate(data.output || '', 80), '', data.output);
  } else if (event === 'error') {
    addTrace(node, 'error', data.detail || '', 'error');
--- a/static/index.html
+++ b/static/index.html
@ -15,9 +15,10 @@
 <div id="node-metrics">
  <div class="node-meter" id="meter-input"><span class="nm-label">input</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
  <div class="node-meter" id="meter-thinker"><span class="nm-label">thinker</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
  <div class="node-meter" id="meter-output"><span class="nm-label">output</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
  <div class="node-meter" id="meter-memorizer"><span class="nm-label">memorizer</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
-  <div class="node-meter" id="meter-sensor"><span class="nm-label">sensor</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
+  <div class="node-meter" id="meter-sensor"><span class="nm-label">sensor</span><span class="nm-text" style="flex:1">—</span></div>
 </div>
 <div id="main">
--- a/static/style.css
+++ b/static/style.css
@ -13,6 +13,7 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0;
 #meter-input .nm-label { color: #f59e0b; }
 #meter-output .nm-label { color: #34d399; }
 #meter-memorizer .nm-label { color: #c084fc; }
 #meter-thinker .nm-label { color: #fb923c; }
 #meter-sensor .nm-label { color: #60a5fa; }
 .nm-bar { flex: 1; height: 6px; background: #1a1a1a; border-radius: 3px; overflow: hidden; }
 .nm-fill { height: 100%; width: 0%; border-radius: 3px; transition: width 0.3s, background-color 0.3s; background: #333; }
@ -52,6 +53,7 @@ button:hover { background: #1d4ed8; }
 .trace-node.input { color: #f59e0b; }
 .trace-node.output { color: #34d399; }
 .trace-node.memorizer { color: #c084fc; }
 .trace-node.thinker { color: #fb923c; }
 .trace-node.runtime { color: #60a5fa; }
 .trace-event { color: #888; flex-shrink: 0; min-width: 6rem; }
--- a/test_cog.py
+++ b/test_cog.py
@ -15,10 +15,8 @@ def clear():
 tests = [
    ("hello!", None),
-    ("hey tina hier!", None),
+    ("what is 42 * 137?", None),
-    ("wir gehen gleich in den pub", None),
+    ("create a sqlite db with 5 customers and show them", None),
    ("nico back - schreib mir ein haiku", None),
    ("auf deutsch, mit unseren namen und deinem, dark future tech theme", None),
    ("wie spaet ist es?", None),
 ]