v0.6.2: Thinker node with python tool execution (S3 Control)

- ThinkerNode: reasons about perception, decides tool use vs direct answer - Python tool: subprocess execution with 10s timeout - Auto-detects python code blocks in LLM output and executes them - Tool call/result visible in trace + HUD - Thinker meter in frontend (token budget: 4K) - Flow: Input (perceive) -> Thinker (reason + tools) -> Output (speak) - Tested: math (42*137=5754), SQLite (create+query), time, greetings Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 01:04:22 +01:00 · 2026-03-28 01:04:22 +01:00 · 8b69e6dd0d
commit 8b69e6dd0d
parent 5c7aece397
5 changed files with 178 additions and 10 deletions
--- a/agent.py
+++ b/agent.py
@ -147,12 +147,20 @@ class Envelope:

@dataclass
 class Command:
-    """Input node's decision — tells Output what to do."""
-    instruction: str      # natural language command for Output LLM
-    source_text: str      # original user message (Output may need it)
+    """Input node's perception — describes what was heard."""
+    instruction: str      # natural language perception
+    source_text: str      # original user message
    metadata: dict = field(default_factory=dict)


+@dataclass
+class ThoughtResult:
+    """Thinker node's output — either a direct answer or tool results."""
+    response: str         # what to tell the user (direct or post-tool)
+    tool_used: str = ""   # which tool was called (empty if none)
+    tool_output: str = "" # raw tool output
+
+
 # --- Base Node ---

 def estimate_tokens(text: str) -> int:
@ -464,6 +472,141 @@ Be natural. Be concise. If the user asks you to do something, do it — don't de
        return full_response


+# --- Thinker Node (S3 — control, reasoning, tool use) ---
+
+import subprocess
+import tempfile
+
+class ThinkerNode(Node):
+    name = "thinker"
+    model = "google/gemini-2.0-flash-001"
+    max_context_tokens = 4000
+
+    SYSTEM = """You are the Thinker node — the brain of this cognitive runtime.
+You receive a perception of what the user said. Decide: answer directly or use a tool.
+
+TOOL FORMAT — when you need to compute, query, or create something, respond with ONLY:
+TOOL: python
+CODE:
+```
+print("result here")
+```
+
+RULES:
+- For math, databases, file ops, any computation: write a ```python code block. It WILL be executed.
+- For simple conversation (greetings, opinions, knowledge): respond directly as text.
+- Your python code runs in a real environment. Use print() for output.
+- NEVER describe code — write it. It will run automatically.
+
+{memory_context}"""
+
+
+    def _parse_tool_call(self, response: str) -> tuple[str, str] | None:
+        """Parse tool calls. Supports TOOL: format and auto-detects python code blocks."""
+        text = response.strip()
+
+        # Explicit TOOL: format
+        if text.startswith("TOOL:"):
+            lines = text.split("\n")
+            tool_name = lines[0].replace("TOOL:", "").strip()
+            code_lines = []
+            in_code = False
+            for line in lines[1:]:
+                if line.strip().startswith("```") and not in_code:
+                    in_code = True
+                    continue
+                elif line.strip().startswith("```") and in_code:
+                    break
+                elif in_code:
+                    code_lines.append(line)
+                elif line.strip().startswith("CODE:"):
+                    continue
+            return (tool_name, "\n".join(code_lines)) if code_lines else None
+
+        # Auto-detect: if response is mostly a python code block, execute it
+        if "```python" in text or "```py" in text:
+            code_lines = []
+            in_code = False
+            for line in text.split("\n"):
+                if ("```python" in line or "```py" in line) and not in_code:
+                    in_code = True
+                    continue
+                elif line.strip() == "```" and in_code:
+                    break
+                elif in_code:
+                    code_lines.append(line)
+            if code_lines and len(code_lines) > 1:
+                return ("python", "\n".join(code_lines))
+
+        return None
+
+    async def process(self, command: Command, history: list[dict], memory_context: str = "") -> ThoughtResult:
+        await self.hud("thinking", detail="reasoning about response")
+
+        messages = [
+            {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
+        ]
+        for msg in history[-12:]:
+            messages.append(msg)
+        messages.append({"role": "system", "content": f"Input perception: {command.instruction}"})
+        messages = self.trim_context(messages)
+
+        await self.hud("context", messages=messages, tokens=self.last_context_tokens,
+                       max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)
+
+        response = await llm_call(self.model, messages)
+        log.info(f"[thinker] response: {response[:200]}")
+
+        # Check if Thinker wants to use a tool
+        tool_call = self._parse_tool_call(response)
+        if tool_call:
+            tool_name, code = tool_call
+            await self.hud("tool_call", tool=tool_name, code=code[:200])
+            log.info(f"[thinker] calling tool: {tool_name}")
+
+            if tool_name == "python":
+                loop = asyncio.get_event_loop()
+                tool_output = await loop.run_in_executor(None, self._run_python_sync, code)
+            else:
+                tool_output = f"[unknown tool: {tool_name}]"
+
+            await self.hud("tool_result", tool=tool_name, output=tool_output[:500])
+            log.info(f"[thinker] tool output: {tool_output[:200]}")
+
+            # Second LLM call: interpret tool output
+            messages.append({"role": "assistant", "content": response})
+            messages.append({"role": "system", "content": f"Tool output:\n{tool_output}"})
+            messages.append({"role": "user", "content": "Now respond to the user based on the tool output. Be natural and concise."})
+            messages = self.trim_context(messages)
+            final = await llm_call(self.model, messages)
+            await self.hud("decided", instruction=final[:200])
+            return ThoughtResult(response=final, tool_used=tool_name, tool_output=tool_output)
+
+        # No tool needed — pass through
+        await self.hud("decided", instruction="direct response (no tools)")
+        return ThoughtResult(response=response)
+
+    def _run_python_sync(self, code: str) -> str:
+        """Sync wrapper for subprocess execution."""
+        try:
+            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as f:
+                f.write(code)
+                f.flush()
+                result = subprocess.run(
+                    ['python3', f.name],
+                    capture_output=True, text=True, timeout=10,
+                    cwd=tempfile.gettempdir()
+                )
+                output = result.stdout
+                if result.returncode != 0:
+                    output += f"\n[stderr: {result.stderr.strip()}]"
+                return output.strip() or "[no output]"
+        except subprocess.TimeoutExpired:
+            return "[error: execution timed out after 10s]"
+        except Exception as e:
+            return f"[error: {e}]"
+
+
 # --- Memorizer Node (S2 — shared state / coordination) ---

 class MemorizerNode(Node):
@ -573,6 +716,7 @@ class Runtime:
        self.history: list[dict] = []
        self.MAX_HISTORY = 40  # sliding window — oldest messages drop off
        self.input_node = InputNode(send_hud=self._send_hud)
+        self.thinker = ThinkerNode(send_hud=self._send_hud)
        self.output_node = OutputNode(send_hud=self._send_hud)
        self.memorizer = MemorizerNode(send_hud=self._send_hud)
        self.sensor = SensorNode(send_hud=self._send_hud)
@ -622,12 +766,29 @@ class Runtime:
        sensor_lines = self.sensor.get_context_lines()
        mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines)

-        # Input node decides (with memory context + identity + channel)
+        # Input node perceives (with memory context + identity + channel)
        command = await self.input_node.process(
            envelope, self.history, memory_context=mem_ctx,
            identity=self.identity, channel=self.channel)

-        # Output node executes (with memory context + history including user msg)
+        # Thinker node reasons + optionally uses tools
+        thought = await self.thinker.process(command, self.history, memory_context=mem_ctx)
+
+        # If Thinker used a tool, inject its result into the command for Output
+        if thought.tool_used:
+            # Thinker already formulated the response — Output just streams it
+            command = Command(
+                instruction=f"Thinker used {thought.tool_used} and says: {thought.response}",
+                source_text=command.source_text
+            )
+        else:
+            # Thinker answered directly — Output streams that
+            command = Command(
+                instruction=f"Thinker says: {thought.response}",
+                source_text=command.source_text
+            )
+
+        # Output node streams the response
        response = await self.output_node.process(command, self.history, self.ws, memory_context=mem_ctx)
        self.history.append({"role": "assistant", "content": response})

--- a/static/app.js
+++ b/static/app.js
@ -167,6 +167,12 @@ function handleHud(data) {
    const detail = JSON.stringify(data.state, null, 2);
    addTrace(node, 'state', pairs, 'state', detail);

+  } else if (event === 'tool_call') {
+    addTrace(node, 'tool: ' + data.tool, truncate(data.code || '', 80), 'instruction', data.code);
+
+  } else if (event === 'tool_result') {
+    addTrace(node, 'result', truncate(data.output || '', 80), '', data.output);
+
  } else if (event === 'error') {
    addTrace(node, 'error', data.detail || '', 'error');

--- a/static/index.html
+++ b/static/index.html
@ -15,9 +15,10 @@

 <div id="node-metrics">
  <div class="node-meter" id="meter-input"><span class="nm-label">input</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
+  <div class="node-meter" id="meter-thinker"><span class="nm-label">thinker</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
  <div class="node-meter" id="meter-output"><span class="nm-label">output</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
  <div class="node-meter" id="meter-memorizer"><span class="nm-label">memorizer</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
-  <div class="node-meter" id="meter-sensor"><span class="nm-label">sensor</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
+  <div class="node-meter" id="meter-sensor"><span class="nm-label">sensor</span><span class="nm-text" style="flex:1">—</span></div>
 </div>

 <div id="main">
--- a/static/style.css
+++ b/static/style.css
@ -13,6 +13,7 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0;
 #meter-input .nm-label { color: #f59e0b; }
 #meter-output .nm-label { color: #34d399; }
 #meter-memorizer .nm-label { color: #c084fc; }
+#meter-thinker .nm-label { color: #fb923c; }
 #meter-sensor .nm-label { color: #60a5fa; }
 .nm-bar { flex: 1; height: 6px; background: #1a1a1a; border-radius: 3px; overflow: hidden; }
 .nm-fill { height: 100%; width: 0%; border-radius: 3px; transition: width 0.3s, background-color 0.3s; background: #333; }
@ -52,6 +53,7 @@ button:hover { background: #1d4ed8; }
 .trace-node.input { color: #f59e0b; }
 .trace-node.output { color: #34d399; }
 .trace-node.memorizer { color: #c084fc; }
+.trace-node.thinker { color: #fb923c; }
 .trace-node.runtime { color: #60a5fa; }

 .trace-event { color: #888; flex-shrink: 0; min-width: 6rem; }
--- a/test_cog.py
+++ b/test_cog.py
@ -15,10 +15,8 @@ def clear():

 tests = [
    ("hello!", None),
-    ("hey tina hier!", None),
-    ("wir gehen gleich in den pub", None),
-    ("nico back - schreib mir ein haiku", None),
-    ("auf deutsch, mit unseren namen und deinem, dark future tech theme", None),
+    ("what is 42 * 137?", None),
+    ("create a sqlite db with 5 customers and show them", None),
    ("wie spaet ist es?", None),
 ]