v0.6.2: Thinker node with python tool execution (S3 Control)
- ThinkerNode: reasons about perception, decides tool use vs direct answer - Python tool: subprocess execution with 10s timeout - Auto-detects python code blocks in LLM output and executes them - Tool call/result visible in trace + HUD - Thinker meter in frontend (token budget: 4K) - Flow: Input (perceive) -> Thinker (reason + tools) -> Output (speak) - Tested: math (42*137=5754), SQLite (create+query), time, greetings Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5c7aece397
commit
8b69e6dd0d
171
agent.py
171
agent.py
@ -147,12 +147,20 @@ class Envelope:
|
||||
|
||||
@dataclass
|
||||
class Command:
|
||||
"""Input node's decision — tells Output what to do."""
|
||||
instruction: str # natural language command for Output LLM
|
||||
source_text: str # original user message (Output may need it)
|
||||
"""Input node's perception — describes what was heard."""
|
||||
instruction: str # natural language perception
|
||||
source_text: str # original user message
|
||||
metadata: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThoughtResult:
|
||||
"""Thinker node's output — either a direct answer or tool results."""
|
||||
response: str # what to tell the user (direct or post-tool)
|
||||
tool_used: str = "" # which tool was called (empty if none)
|
||||
tool_output: str = "" # raw tool output
|
||||
|
||||
|
||||
# --- Base Node ---
|
||||
|
||||
def estimate_tokens(text: str) -> int:
|
||||
@ -464,6 +472,141 @@ Be natural. Be concise. If the user asks you to do something, do it — don't de
|
||||
return full_response
|
||||
|
||||
|
||||
# --- Thinker Node (S3 — control, reasoning, tool use) ---
|
||||
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
class ThinkerNode(Node):
|
||||
name = "thinker"
|
||||
model = "google/gemini-2.0-flash-001"
|
||||
max_context_tokens = 4000
|
||||
|
||||
SYSTEM = """You are the Thinker node — the brain of this cognitive runtime.
|
||||
You receive a perception of what the user said. Decide: answer directly or use a tool.
|
||||
|
||||
TOOL FORMAT — when you need to compute, query, or create something, respond with ONLY:
|
||||
TOOL: python
|
||||
CODE:
|
||||
```
|
||||
print("result here")
|
||||
```
|
||||
|
||||
RULES:
|
||||
- For math, databases, file ops, any computation: write a ```python code block. It WILL be executed.
|
||||
- For simple conversation (greetings, opinions, knowledge): respond directly as text.
|
||||
- Your python code runs in a real environment. Use print() for output.
|
||||
- NEVER describe code — write it. It will run automatically.
|
||||
|
||||
{memory_context}"""
|
||||
|
||||
|
||||
def _parse_tool_call(self, response: str) -> tuple[str, str] | None:
|
||||
"""Parse tool calls. Supports TOOL: format and auto-detects python code blocks."""
|
||||
text = response.strip()
|
||||
|
||||
# Explicit TOOL: format
|
||||
if text.startswith("TOOL:"):
|
||||
lines = text.split("\n")
|
||||
tool_name = lines[0].replace("TOOL:", "").strip()
|
||||
code_lines = []
|
||||
in_code = False
|
||||
for line in lines[1:]:
|
||||
if line.strip().startswith("```") and not in_code:
|
||||
in_code = True
|
||||
continue
|
||||
elif line.strip().startswith("```") and in_code:
|
||||
break
|
||||
elif in_code:
|
||||
code_lines.append(line)
|
||||
elif line.strip().startswith("CODE:"):
|
||||
continue
|
||||
return (tool_name, "\n".join(code_lines)) if code_lines else None
|
||||
|
||||
# Auto-detect: if response is mostly a python code block, execute it
|
||||
if "```python" in text or "```py" in text:
|
||||
code_lines = []
|
||||
in_code = False
|
||||
for line in text.split("\n"):
|
||||
if ("```python" in line or "```py" in line) and not in_code:
|
||||
in_code = True
|
||||
continue
|
||||
elif line.strip() == "```" and in_code:
|
||||
break
|
||||
elif in_code:
|
||||
code_lines.append(line)
|
||||
if code_lines and len(code_lines) > 1:
|
||||
return ("python", "\n".join(code_lines))
|
||||
|
||||
return None
|
||||
|
||||
async def process(self, command: Command, history: list[dict], memory_context: str = "") -> ThoughtResult:
|
||||
await self.hud("thinking", detail="reasoning about response")
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
|
||||
]
|
||||
for msg in history[-12:]:
|
||||
messages.append(msg)
|
||||
messages.append({"role": "system", "content": f"Input perception: {command.instruction}"})
|
||||
messages = self.trim_context(messages)
|
||||
|
||||
await self.hud("context", messages=messages, tokens=self.last_context_tokens,
|
||||
max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)
|
||||
|
||||
response = await llm_call(self.model, messages)
|
||||
log.info(f"[thinker] response: {response[:200]}")
|
||||
|
||||
# Check if Thinker wants to use a tool
|
||||
tool_call = self._parse_tool_call(response)
|
||||
if tool_call:
|
||||
tool_name, code = tool_call
|
||||
await self.hud("tool_call", tool=tool_name, code=code[:200])
|
||||
log.info(f"[thinker] calling tool: {tool_name}")
|
||||
|
||||
if tool_name == "python":
|
||||
loop = asyncio.get_event_loop()
|
||||
tool_output = await loop.run_in_executor(None, self._run_python_sync, code)
|
||||
else:
|
||||
tool_output = f"[unknown tool: {tool_name}]"
|
||||
|
||||
await self.hud("tool_result", tool=tool_name, output=tool_output[:500])
|
||||
log.info(f"[thinker] tool output: {tool_output[:200]}")
|
||||
|
||||
# Second LLM call: interpret tool output
|
||||
messages.append({"role": "assistant", "content": response})
|
||||
messages.append({"role": "system", "content": f"Tool output:\n{tool_output}"})
|
||||
messages.append({"role": "user", "content": "Now respond to the user based on the tool output. Be natural and concise."})
|
||||
messages = self.trim_context(messages)
|
||||
final = await llm_call(self.model, messages)
|
||||
await self.hud("decided", instruction=final[:200])
|
||||
return ThoughtResult(response=final, tool_used=tool_name, tool_output=tool_output)
|
||||
|
||||
# No tool needed — pass through
|
||||
await self.hud("decided", instruction="direct response (no tools)")
|
||||
return ThoughtResult(response=response)
|
||||
|
||||
def _run_python_sync(self, code: str) -> str:
|
||||
"""Sync wrapper for subprocess execution."""
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as f:
|
||||
f.write(code)
|
||||
f.flush()
|
||||
result = subprocess.run(
|
||||
['python3', f.name],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
cwd=tempfile.gettempdir()
|
||||
)
|
||||
output = result.stdout
|
||||
if result.returncode != 0:
|
||||
output += f"\n[stderr: {result.stderr.strip()}]"
|
||||
return output.strip() or "[no output]"
|
||||
except subprocess.TimeoutExpired:
|
||||
return "[error: execution timed out after 10s]"
|
||||
except Exception as e:
|
||||
return f"[error: {e}]"
|
||||
|
||||
|
||||
# --- Memorizer Node (S2 — shared state / coordination) ---
|
||||
|
||||
class MemorizerNode(Node):
|
||||
@ -573,6 +716,7 @@ class Runtime:
|
||||
self.history: list[dict] = []
|
||||
self.MAX_HISTORY = 40 # sliding window — oldest messages drop off
|
||||
self.input_node = InputNode(send_hud=self._send_hud)
|
||||
self.thinker = ThinkerNode(send_hud=self._send_hud)
|
||||
self.output_node = OutputNode(send_hud=self._send_hud)
|
||||
self.memorizer = MemorizerNode(send_hud=self._send_hud)
|
||||
self.sensor = SensorNode(send_hud=self._send_hud)
|
||||
@ -622,12 +766,29 @@ class Runtime:
|
||||
sensor_lines = self.sensor.get_context_lines()
|
||||
mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines)
|
||||
|
||||
# Input node decides (with memory context + identity + channel)
|
||||
# Input node perceives (with memory context + identity + channel)
|
||||
command = await self.input_node.process(
|
||||
envelope, self.history, memory_context=mem_ctx,
|
||||
identity=self.identity, channel=self.channel)
|
||||
|
||||
# Output node executes (with memory context + history including user msg)
|
||||
# Thinker node reasons + optionally uses tools
|
||||
thought = await self.thinker.process(command, self.history, memory_context=mem_ctx)
|
||||
|
||||
# If Thinker used a tool, inject its result into the command for Output
|
||||
if thought.tool_used:
|
||||
# Thinker already formulated the response — Output just streams it
|
||||
command = Command(
|
||||
instruction=f"Thinker used {thought.tool_used} and says: {thought.response}",
|
||||
source_text=command.source_text
|
||||
)
|
||||
else:
|
||||
# Thinker answered directly — Output streams that
|
||||
command = Command(
|
||||
instruction=f"Thinker says: {thought.response}",
|
||||
source_text=command.source_text
|
||||
)
|
||||
|
||||
# Output node streams the response
|
||||
response = await self.output_node.process(command, self.history, self.ws, memory_context=mem_ctx)
|
||||
self.history.append({"role": "assistant", "content": response})
|
||||
|
||||
|
||||
@ -167,6 +167,12 @@ function handleHud(data) {
|
||||
const detail = JSON.stringify(data.state, null, 2);
|
||||
addTrace(node, 'state', pairs, 'state', detail);
|
||||
|
||||
} else if (event === 'tool_call') {
|
||||
addTrace(node, 'tool: ' + data.tool, truncate(data.code || '', 80), 'instruction', data.code);
|
||||
|
||||
} else if (event === 'tool_result') {
|
||||
addTrace(node, 'result', truncate(data.output || '', 80), '', data.output);
|
||||
|
||||
} else if (event === 'error') {
|
||||
addTrace(node, 'error', data.detail || '', 'error');
|
||||
|
||||
|
||||
@ -15,9 +15,10 @@
|
||||
|
||||
<div id="node-metrics">
|
||||
<div class="node-meter" id="meter-input"><span class="nm-label">input</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
|
||||
<div class="node-meter" id="meter-thinker"><span class="nm-label">thinker</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
|
||||
<div class="node-meter" id="meter-output"><span class="nm-label">output</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
|
||||
<div class="node-meter" id="meter-memorizer"><span class="nm-label">memorizer</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
|
||||
<div class="node-meter" id="meter-sensor"><span class="nm-label">sensor</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text">—</span></div>
|
||||
<div class="node-meter" id="meter-sensor"><span class="nm-label">sensor</span><span class="nm-text" style="flex:1">—</span></div>
|
||||
</div>
|
||||
|
||||
<div id="main">
|
||||
|
||||
@ -13,6 +13,7 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0;
|
||||
#meter-input .nm-label { color: #f59e0b; }
|
||||
#meter-output .nm-label { color: #34d399; }
|
||||
#meter-memorizer .nm-label { color: #c084fc; }
|
||||
#meter-thinker .nm-label { color: #fb923c; }
|
||||
#meter-sensor .nm-label { color: #60a5fa; }
|
||||
.nm-bar { flex: 1; height: 6px; background: #1a1a1a; border-radius: 3px; overflow: hidden; }
|
||||
.nm-fill { height: 100%; width: 0%; border-radius: 3px; transition: width 0.3s, background-color 0.3s; background: #333; }
|
||||
@ -52,6 +53,7 @@ button:hover { background: #1d4ed8; }
|
||||
.trace-node.input { color: #f59e0b; }
|
||||
.trace-node.output { color: #34d399; }
|
||||
.trace-node.memorizer { color: #c084fc; }
|
||||
.trace-node.thinker { color: #fb923c; }
|
||||
.trace-node.runtime { color: #60a5fa; }
|
||||
|
||||
.trace-event { color: #888; flex-shrink: 0; min-width: 6rem; }
|
||||
|
||||
@ -15,10 +15,8 @@ def clear():
|
||||
|
||||
tests = [
|
||||
("hello!", None),
|
||||
("hey tina hier!", None),
|
||||
("wir gehen gleich in den pub", None),
|
||||
("nico back - schreib mir ein haiku", None),
|
||||
("auf deutsch, mit unseren namen und deinem, dark future tech theme", None),
|
||||
("what is 42 * 137?", None),
|
||||
("create a sqlite db with 5 customers and show them", None),
|
||||
("wie spaet ist es?", None),
|
||||
]
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user