Nico b6ca02f864 v0.9.2: dedicated UI node, strict node roles, markdown rendering
6-node pipeline: Input -> Thinker -> Output (voice) + UI (screen) in parallel

- Output: text only (markdown, emoji). Never emits HTML or controls.
- UI: dedicated node for labels, buttons, tables. Tracks workspace state.
  Replaces entire workspace on each update. Runs parallel with Output.
- Input: strict one-sentence perception. No more hallucinating responses.
- Thinker: controls removed from prompt, focuses on reasoning + tools.
- Frontend: markdown rendered in chat (bold, italic, code blocks, lists).
  Label control type added. UI node meter in top bar.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 14:12:15 +01:00

103 lines
3.7 KiB
Python

"""UI Node: renders interactive elements to the awareness panel workspace."""
import json
import logging
from .base import Node
from ..llm import llm_call
from ..types import ThoughtResult
log = logging.getLogger("runtime")
class UINode(Node):
name = "ui"
model = "google/gemini-2.0-flash-001"
max_context_tokens = 3000
SYSTEM = """You are the UI node of a cognitive agent runtime.
You render interactive elements to a workspace panel in the browser. A separate Output node handles all text — you NEVER write prose, explanations, or messages.
YOUR OUTPUT: A JSON array of UI elements, or [] if nothing to show.
ELEMENT TYPES:
label — display a value:
{{"type": "label", "id": "unique_id", "text": "Label Text", "value": "current value"}}
button — clickable action:
{{"type": "button", "label": "Short Label", "action": "action_name", "payload": {{"key": "value"}}}}
table — structured data:
{{"type": "table", "columns": ["col1", "col2"], "data": [{{"col1": "val", "col2": "val"}}]}}
RULES:
- Output ONLY a valid JSON array. No text, no markdown, no explanation.
- Labels: show key values the user asked about or that resulted from tool execution.
- Buttons: offer clear follow-up actions. Keep labels 2-4 words. Action is snake_case.
- Tables: when tool output contains structured/tabular data.
- Return [] when the response is purely conversational with no actionable data.
- Every element you emit REPLACES the entire workspace. Include all elements that should be visible.
CURRENT WORKSPACE:
{current_controls}"""
def __init__(self, send_hud):
super().__init__(send_hud)
self.current_controls: list[dict] = []
async def process(self, thought: ThoughtResult, history: list[dict],
memory_context: str = "") -> list[dict]:
await self.hud("thinking", detail="deciding UI controls")
# Show UI what's currently rendered
if self.current_controls:
ctrl_desc = json.dumps(self.current_controls, indent=2)
else:
ctrl_desc = "(empty)"
messages = [
{"role": "system", "content": self.SYSTEM.format(current_controls=ctrl_desc)},
]
for msg in history[-6:]:
messages.append(msg)
ctx = f"Thinker response: {thought.response}"
if thought.tool_used:
ctx += f"\n\nTool: {thought.tool_used}\nTool output:\n{thought.tool_output}"
messages.append({"role": "system", "content": ctx})
messages.append({"role": "user", "content": "What UI elements should the workspace show now? Return JSON array."})
messages = self.trim_context(messages)
await self.hud("context", messages=messages, tokens=self.last_context_tokens,
max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)
raw = await llm_call(self.model, messages)
log.info(f"[ui] raw: {raw[:200]}")
text = raw.strip()
if text.startswith("```"):
text = text.split("\n", 1)[1] if "\n" in text else text[3:]
if text.endswith("```"):
text = text[:-3]
text = text.strip()
try:
controls = json.loads(text)
if not isinstance(controls, list):
controls = []
except (json.JSONDecodeError, Exception) as e:
log.error(f"[ui] parse error: {e}, raw: {text[:200]}")
controls = []
if controls:
self.current_controls = controls
await self.hud("controls", controls=controls)
log.info(f"[ui] emitting {len(controls)} controls")
else:
await self.hud("decided", instruction="no controls needed")
return controls