Nico 231f81bc52 v0.8.2: fix pipeline — skip Output for tools, process HUD, inline controls, structured actions
- Thinker tool results stream directly to user, skipping Output node (halves latency)
- ProcessManager process_start/process_done events render as live cards in chat
- UI controls sent before response text, not after
- Button clicks route to handle_action(), skip Input, go straight to Thinker
- Fix Thinker model: gemini-2.5-flash-preview -> gemini-2.5-flash (old ID expired)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 01:43:07 +01:00

167 lines
6.5 KiB
Python

"""Thinker Node: S3 — control, reasoning, tool use."""
import json
import logging
import re
from .base import Node
from ..llm import llm_call
from ..process import ProcessManager
from ..types import Command, ThoughtResult
log = logging.getLogger("runtime")
class ThinkerNode(Node):
name = "thinker"
model = "google/gemini-2.5-flash"
max_context_tokens = 4000
SYSTEM = """You are the Thinker node — the brain of this cognitive runtime.
You receive a perception of what the user said. Decide: answer directly, use a tool, or show UI controls.
TOOLS — write a ```python code block and it WILL be executed. Use print() for output.
- For math, databases, file ops, any computation: write python. NEVER describe code — write it.
- For simple conversation: respond directly as text.
UI CONTROLS — to show interactive elements, include a JSON block:
```controls
[
{{"type": "table", "data": [...], "columns": ["id", "name", "email"]}},
{{"type": "button", "label": "Add Customer", "action": "add_customer"}},
{{"type": "button", "label": "Refresh", "action": "refresh_customers"}}
]
```
Controls render in the chat. User clicks flow back as actions you can handle.
You can combine text + code + controls in one response.
{memory_context}"""
def __init__(self, send_hud, process_manager: ProcessManager = None):
super().__init__(send_hud)
self.pm = process_manager
def _parse_tool_call(self, response: str) -> tuple[str, str] | None:
"""Parse tool calls. Supports TOOL: format and auto-detects python code blocks."""
text = response.strip()
if text.startswith("TOOL:"):
lines = text.split("\n")
tool_name = lines[0].replace("TOOL:", "").strip()
code_lines = []
in_code = False
for line in lines[1:]:
if line.strip().startswith("```") and not in_code:
in_code = True
continue
elif line.strip().startswith("```") and in_code:
break
elif in_code:
code_lines.append(line)
elif line.strip().startswith("CODE:"):
continue
return (tool_name, "\n".join(code_lines)) if code_lines else None
block_match = re.search(r'```(?:python|py|sql|sqlite|sh|bash|tool_code)?\s*\n(.*?)```', text, re.DOTALL)
if block_match:
code = block_match.group(1).strip()
if code and len(code.split("\n")) > 0:
if "```sql" in text or "```sqlite" in text or ("SELECT" in code.upper() and "CREATE" in code.upper()):
wrapped = f'''import sqlite3
conn = sqlite3.connect("/tmp/cog_db.sqlite")
cursor = conn.cursor()
for stmt in """{code}""".split(";"):
stmt = stmt.strip()
if stmt:
cursor.execute(stmt)
conn.commit()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = cursor.fetchall()
for t in tables:
cursor.execute(f"SELECT * FROM {{t[0]}}")
rows = cursor.fetchall()
cols = [d[0] for d in cursor.description]
print(f"Table: {{t[0]}}")
print(" | ".join(cols))
for row in rows:
print(" | ".join(str(c) for c in row))
conn.close()'''
return ("python", wrapped)
return ("python", code)
return None
def _parse_controls(self, response: str) -> list[dict]:
"""Extract ```controls JSON blocks from response."""
controls = []
if "```controls" not in response:
return controls
parts = response.split("```controls")
for part in parts[1:]:
end = part.find("```")
if end != -1:
try:
controls.extend(json.loads(part[:end].strip()))
except json.JSONDecodeError:
pass
return controls
def _strip_blocks(self, response: str) -> str:
"""Remove code and control blocks, return plain text."""
text = re.sub(r'```(?:python|py|controls).*?```', '', response, flags=re.DOTALL)
return text.strip()
async def process(self, command: Command, history: list[dict], memory_context: str = "") -> ThoughtResult:
await self.hud("thinking", detail="reasoning about response")
messages = [
{"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
]
for msg in history[-12:]:
messages.append(msg)
messages.append({"role": "system", "content": f"Input perception: {command.instruction}"})
messages = self.trim_context(messages)
await self.hud("context", messages=messages, tokens=self.last_context_tokens,
max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)
response = await llm_call(self.model, messages)
log.info(f"[thinker] response: {response[:200]}")
controls = self._parse_controls(response)
if controls:
await self.hud("controls", controls=controls)
tool_call = self._parse_tool_call(response)
if tool_call:
tool_name, code = tool_call
if self.pm and tool_name == "python":
proc = await self.pm.execute(tool_name, code)
tool_output = "\n".join(proc.output_lines)
else:
tool_output = f"[unknown tool: {tool_name}]"
log.info(f"[thinker] tool output: {tool_output[:200]}")
messages.append({"role": "assistant", "content": response})
messages.append({"role": "system", "content": f"Tool output:\n{tool_output}"})
messages.append({"role": "user", "content": "Respond to the user based on the tool output. If showing data, include a ```controls block with a table. Be natural and concise."})
messages = self.trim_context(messages)
final = await llm_call(self.model, messages)
more_controls = self._parse_controls(final)
if more_controls:
controls.extend(more_controls)
await self.hud("controls", controls=more_controls)
clean_text = self._strip_blocks(final)
await self.hud("decided", instruction=clean_text[:200])
return ThoughtResult(response=clean_text, tool_used=tool_name,
tool_output=tool_output, controls=controls)
clean_text = self._strip_blocks(response) or response
await self.hud("decided", instruction="direct response (no tools)")
return ThoughtResult(response=clean_text, controls=controls)