From b6ca02f8643a2d500afd0b7451f5321b7361b254 Mon Sep 17 00:00:00 2001 From: Nico Date: Sat, 28 Mar 2026 14:12:15 +0100 Subject: [PATCH] v0.9.2: dedicated UI node, strict node roles, markdown rendering 6-node pipeline: Input -> Thinker -> Output (voice) + UI (screen) in parallel - Output: text only (markdown, emoji). Never emits HTML or controls. - UI: dedicated node for labels, buttons, tables. Tracks workspace state. Replaces entire workspace on each update. Runs parallel with Output. - Input: strict one-sentence perception. No more hallucinating responses. - Thinker: controls removed from prompt, focuses on reasoning + tools. - Frontend: markdown rendered in chat (bold, italic, code blocks, lists). Label control type added. UI node meter in top bar. Co-Authored-By: Claude Opus 4.6 (1M context) --- agent/nodes/__init__.py | 3 +- agent/nodes/input.py | 19 +++++--- agent/nodes/output.py | 21 +++++---- agent/nodes/thinker.py | 57 +++++----------------- agent/nodes/ui.py | 102 ++++++++++++++++++++++++++++++++++++++++ agent/runtime.py | 44 ++++++++--------- static/app.js | 45 ++++++++++++++++-- static/index.html | 1 + static/style.css | 15 ++++++ 9 files changed, 221 insertions(+), 86 deletions(-) create mode 100644 agent/nodes/ui.py diff --git a/agent/nodes/__init__.py b/agent/nodes/__init__.py index b5890e7..99777ab 100644 --- a/agent/nodes/__init__.py +++ b/agent/nodes/__init__.py @@ -5,5 +5,6 @@ from .input import InputNode from .output import OutputNode from .thinker import ThinkerNode from .memorizer import MemorizerNode +from .ui import UINode -__all__ = ["SensorNode", "InputNode", "OutputNode", "ThinkerNode", "MemorizerNode"] +__all__ = ["SensorNode", "InputNode", "OutputNode", "ThinkerNode", "MemorizerNode", "UINode"] diff --git a/agent/nodes/input.py b/agent/nodes/input.py index b3b978b..183af9c 100644 --- a/agent/nodes/input.py +++ b/agent/nodes/input.py @@ -16,14 +16,19 @@ class InputNode(Node): SYSTEM = """You are the Input node — the ear of this cognitive runtime. -Listener context: -- Authenticated user: {identity} -- Channel: {channel} (Chrome browser on Nico's Windows PC, in his room at home) -- Physical: private space, Nico lives with Tina — she may use this session too -- Security: single-user account, shared physical space — other voices are trusted household +Listener: {identity} on {channel} -Your job: describe what you heard. Who spoke, what they want, what tone, what context matters. -ONE sentence. No content, no response — just your perception of what came through. +YOUR ONLY JOB: Describe what you heard in ONE short sentence. +- Who spoke, what they want, what tone. +- Example: "Nico asks what time it is, casual tone." +- Example: "Nico wants to create a database with customer data, direct request." +- Example: "Nico reports a UI bug — he can't see a value updating, frustrated tone." + +STRICT RULES: +- ONLY output a single perception sentence. Nothing else. +- NEVER generate a response, code, HTML, or suggestions. +- NEVER answer the user's question — that's not your job. +- NEVER write more than one sentence. {memory_context}""" diff --git a/agent/nodes/output.py b/agent/nodes/output.py index de84e89..517ae53 100644 --- a/agent/nodes/output.py +++ b/agent/nodes/output.py @@ -17,20 +17,23 @@ class OutputNode(Node): model = "google/gemini-2.0-flash-001" max_context_tokens = 4000 - SYSTEM = """You are the Output node — the renderer of this cognitive runtime. + SYSTEM = """You are the Output node — the voice of this cognitive runtime. -DEVICE: The user is on a web browser (Chrome, desktop). Your output renders in an HTML chat panel. -You can use markdown: **bold**, *italic*, `code`, ```code blocks```, lists, headers. -The chat panel renders markdown to HTML — use it for structure when helpful. +YOU ARE TEXT ONLY. Your output goes to a chat bubble. You can use: +- Markdown: **bold**, *italic*, `code`, ```code blocks```, lists, headers +- Emojis when they add warmth or clarity +- Short, structured text (bullet points, numbered lists) -YOUR JOB: Transform the Thinker's reasoning into a polished, user-facing response. -- The Thinker reasons and may use tools. You receive its output and render it for the human. +NEVER output HTML, buttons, tables, labels, or any UI elements. +A separate UI node handles all interactive elements — you just speak. + +YOUR JOB: Transform the Thinker's reasoning into a natural, human-readable text response. - NEVER echo internal node names, perceptions, or system details. - NEVER say "the Thinker decided..." or "I'll process..." — just deliver the answer. -- If the Thinker ran a tool and got output, weave the results into a natural response. -- If the Thinker gave a direct answer, refine and format it — don't just repeat it. +- If the Thinker ran a tool and got output, summarize the results in text. +- If the Thinker gave a direct answer, refine the wording — don't just repeat verbatim. - Keep the user's language — if they wrote German, respond in German. -- Be concise but complete. Use formatting to make data scannable. +- Be concise. Don't describe data that the UI node will show as a table. {memory_context}""" diff --git a/agent/nodes/thinker.py b/agent/nodes/thinker.py index 2f48581..1c7fd04 100644 --- a/agent/nodes/thinker.py +++ b/agent/nodes/thinker.py @@ -18,23 +18,13 @@ class ThinkerNode(Node): max_context_tokens = 4000 SYSTEM = """You are the Thinker node — the brain of this cognitive runtime. -You receive a perception of what the user said. Decide: answer directly, use a tool, or show UI controls. +You receive a perception of what the user said. Decide: answer directly or use a tool. TOOLS — write a ```python code block and it WILL be executed. Use print() for output. - For math, databases, file ops, any computation: write python. NEVER describe code — write it. - For simple conversation: respond directly as text. -UI CONTROLS — to show interactive elements, include a JSON block: -```controls -[ - {{"type": "table", "data": [...], "columns": ["id", "name", "email"]}}, - {{"type": "button", "label": "Add Customer", "action": "add_customer"}}, - {{"type": "button", "label": "Refresh", "action": "refresh_customers"}} -] -``` -Controls render in the chat. User clicks flow back as actions you can handle. - -You can combine text + code + controls in one response. +A separate UI node handles all visual controls (buttons, tables). Just focus on reasoning and content. {memory_context}""" @@ -94,24 +84,9 @@ conn.close()''' return None - def _parse_controls(self, response: str) -> list[dict]: - """Extract ```controls JSON blocks from response.""" - controls = [] - if "```controls" not in response: - return controls - parts = response.split("```controls") - for part in parts[1:]: - end = part.find("```") - if end != -1: - try: - controls.extend(json.loads(part[:end].strip())) - except json.JSONDecodeError: - pass - return controls - - def _strip_blocks(self, response: str) -> str: - """Remove code and control blocks, return plain text.""" - text = re.sub(r'```(?:python|py|controls).*?```', '', response, flags=re.DOTALL) + def _strip_code_blocks(self, response: str) -> str: + """Remove code blocks, return plain text.""" + text = re.sub(r'```(?:python|py|sql|sqlite|sh|bash|tool_code).*?```', '', response, flags=re.DOTALL) return text.strip() async def process(self, command: Command, history: list[dict], memory_context: str = "") -> ThoughtResult: @@ -129,12 +104,10 @@ conn.close()''' max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct) response = await llm_call(self.model, messages) + if not response: + response = "[no response from LLM]" log.info(f"[thinker] response: {response[:200]}") - controls = self._parse_controls(response) - if controls: - await self.hud("controls", controls=controls) - tool_call = self._parse_tool_call(response) if tool_call: tool_name, code = tool_call @@ -147,22 +120,18 @@ conn.close()''' log.info(f"[thinker] tool output: {tool_output[:200]}") + # Second call: interpret tool output messages.append({"role": "assistant", "content": response}) messages.append({"role": "system", "content": f"Tool output:\n{tool_output}"}) - messages.append({"role": "user", "content": "Respond to the user based on the tool output. If showing data, include a ```controls block with a table. Be natural and concise."}) + messages.append({"role": "user", "content": "Respond to the user based on the tool output. Be natural and concise."}) messages = self.trim_context(messages) final = await llm_call(self.model, messages) - more_controls = self._parse_controls(final) - if more_controls: - controls.extend(more_controls) - await self.hud("controls", controls=more_controls) - - clean_text = self._strip_blocks(final) + clean_text = self._strip_code_blocks(final) await self.hud("decided", instruction=clean_text[:200]) return ThoughtResult(response=clean_text, tool_used=tool_name, - tool_output=tool_output, controls=controls) + tool_output=tool_output) - clean_text = self._strip_blocks(response) or response + clean_text = self._strip_code_blocks(response) or response await self.hud("decided", instruction="direct response (no tools)") - return ThoughtResult(response=clean_text, controls=controls) + return ThoughtResult(response=clean_text) diff --git a/agent/nodes/ui.py b/agent/nodes/ui.py new file mode 100644 index 0000000..5b089ac --- /dev/null +++ b/agent/nodes/ui.py @@ -0,0 +1,102 @@ +"""UI Node: renders interactive elements to the awareness panel workspace.""" + +import json +import logging + +from .base import Node +from ..llm import llm_call +from ..types import ThoughtResult + +log = logging.getLogger("runtime") + + +class UINode(Node): + name = "ui" + model = "google/gemini-2.0-flash-001" + max_context_tokens = 3000 + + SYSTEM = """You are the UI node of a cognitive agent runtime. + +You render interactive elements to a workspace panel in the browser. A separate Output node handles all text — you NEVER write prose, explanations, or messages. + +YOUR OUTPUT: A JSON array of UI elements, or [] if nothing to show. + +ELEMENT TYPES: + +label — display a value: + {{"type": "label", "id": "unique_id", "text": "Label Text", "value": "current value"}} + +button — clickable action: + {{"type": "button", "label": "Short Label", "action": "action_name", "payload": {{"key": "value"}}}} + +table — structured data: + {{"type": "table", "columns": ["col1", "col2"], "data": [{{"col1": "val", "col2": "val"}}]}} + +RULES: +- Output ONLY a valid JSON array. No text, no markdown, no explanation. +- Labels: show key values the user asked about or that resulted from tool execution. +- Buttons: offer clear follow-up actions. Keep labels 2-4 words. Action is snake_case. +- Tables: when tool output contains structured/tabular data. +- Return [] when the response is purely conversational with no actionable data. +- Every element you emit REPLACES the entire workspace. Include all elements that should be visible. + +CURRENT WORKSPACE: +{current_controls}""" + + def __init__(self, send_hud): + super().__init__(send_hud) + self.current_controls: list[dict] = [] + + async def process(self, thought: ThoughtResult, history: list[dict], + memory_context: str = "") -> list[dict]: + await self.hud("thinking", detail="deciding UI controls") + + # Show UI what's currently rendered + if self.current_controls: + ctrl_desc = json.dumps(self.current_controls, indent=2) + else: + ctrl_desc = "(empty)" + + messages = [ + {"role": "system", "content": self.SYSTEM.format(current_controls=ctrl_desc)}, + ] + + for msg in history[-6:]: + messages.append(msg) + + ctx = f"Thinker response: {thought.response}" + if thought.tool_used: + ctx += f"\n\nTool: {thought.tool_used}\nTool output:\n{thought.tool_output}" + messages.append({"role": "system", "content": ctx}) + messages.append({"role": "user", "content": "What UI elements should the workspace show now? Return JSON array."}) + + messages = self.trim_context(messages) + await self.hud("context", messages=messages, tokens=self.last_context_tokens, + max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct) + + raw = await llm_call(self.model, messages) + log.info(f"[ui] raw: {raw[:200]}") + + text = raw.strip() + if text.startswith("```"): + text = text.split("\n", 1)[1] if "\n" in text else text[3:] + if text.endswith("```"): + text = text[:-3] + text = text.strip() + + try: + controls = json.loads(text) + if not isinstance(controls, list): + controls = [] + except (json.JSONDecodeError, Exception) as e: + log.error(f"[ui] parse error: {e}, raw: {text[:200]}") + controls = [] + + if controls: + self.current_controls = controls + await self.hud("controls", controls=controls) + log.info(f"[ui] emitting {len(controls)} controls") + else: + await self.hud("decided", instruction="no controls needed") + + return controls diff --git a/agent/runtime.py b/agent/runtime.py index ef16f62..9d51701 100644 --- a/agent/runtime.py +++ b/agent/runtime.py @@ -1,5 +1,6 @@ """Runtime: wires all nodes together into a processing pipeline.""" +import asyncio import json import logging import time @@ -10,7 +11,7 @@ from fastapi import WebSocket from .types import Envelope, Command from .process import ProcessManager -from .nodes import SensorNode, InputNode, OutputNode, ThinkerNode, MemorizerNode +from .nodes import SensorNode, InputNode, OutputNode, ThinkerNode, MemorizerNode, UINode log = logging.getLogger("runtime") @@ -29,6 +30,7 @@ class Runtime: self.process_manager = ProcessManager(send_hud=self._send_hud) self.thinker = ThinkerNode(send_hud=self._send_hud, process_manager=self.process_manager) self.output_node = OutputNode(send_hud=self._send_hud) + self.ui_node = UINode(send_hud=self._send_hud) self.memorizer = MemorizerNode(send_hud=self._send_hud) self.sensor = SensorNode(send_hud=self._send_hud) self.sensor.start(get_memo_state=lambda: self.memorizer.state) @@ -55,36 +57,38 @@ class Runtime: log.error(f"trace write error: {e}") self._broadcast(trace_entry) - async def _stream_text(self, text: str): - """Stream pre-formed text to the client as deltas, simulating LLM output.""" - # Send in chunks to feel natural - chunk_size = 12 - for i in range(0, len(text), chunk_size): - chunk = text[i:i + chunk_size] - await self.ws.send_text(json.dumps({"type": "delta", "content": chunk})) - await self.ws.send_text(json.dumps({"type": "done"})) + async def _run_output_and_ui(self, thought, mem_ctx): + """Run Output and UI nodes in parallel. Returns (response_text, controls).""" + output_task = asyncio.create_task( + self.output_node.process(thought, self.history, self.ws, memory_context=mem_ctx)) + ui_task = asyncio.create_task( + self.ui_node.process(thought, self.history, memory_context=mem_ctx)) + + # Output streams to WS, UI returns controls — both run concurrently + response, controls = await asyncio.gather(output_task, ui_task) + + # Send controls after Output starts streaming (UI may finish first or after) + if controls: + await self.ws.send_text(json.dumps({"type": "controls", "controls": controls})) + + return response async def handle_action(self, action: str, data: dict = None): """Handle a structured UI action (button click etc.).""" - # Format as a structured message that Thinker can parse action_desc = f"ACTION: {action}" if data: action_desc += f" | data: {json.dumps(data)}" - # Add to history as a system-level event, not user speech self.history.append({"role": "user", "content": action_desc}) self.sensor.note_user_activity() sensor_lines = self.sensor.get_context_lines() mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines) - # Skip Input node — this isn't speech to perceive, go straight to Thinker + # Skip Input — this isn't speech, go straight to Thinker command = Command(instruction=f"User clicked UI button: {action}", source_text=action_desc) thought = await self.thinker.process(command, self.history, memory_context=mem_ctx) - if thought.controls: - await self.ws.send_text(json.dumps({"type": "controls", "controls": thought.controls})) - - response = await self.output_node.process(thought, self.history, self.ws, memory_context=mem_ctx) + response = await self._run_output_and_ui(thought, mem_ctx) self.history.append({"role": "assistant", "content": response}) await self.memorizer.update(self.history) @@ -112,12 +116,8 @@ class Runtime: thought = await self.thinker.process(command, self.history, memory_context=mem_ctx) - # Send controls inline (before response text) - if thought.controls: - await self.ws.send_text(json.dumps({"type": "controls", "controls": thought.controls})) - - # Output renders Thinker's reasoning into device-appropriate response - response = await self.output_node.process(thought, self.history, self.ws, memory_context=mem_ctx) + # Output (voice) and UI (screen) run in parallel + response = await self._run_output_and_ui(thought, mem_ctx) self.history.append({"role": "assistant", "content": response}) diff --git a/static/app.js b/static/app.js index f9b0879..dbe2c25 100644 --- a/static/app.js +++ b/static/app.js @@ -127,7 +127,11 @@ function connect() { scroll(msgs); } else if (data.type === 'done') { - if (currentEl) currentEl.classList.remove('streaming'); + if (currentEl) { + currentEl.classList.remove('streaming'); + // Render markdown now that streaming is complete + currentEl.innerHTML = renderMarkdown(currentEl.textContent); + } currentEl = null; } else if (data.type === 'controls') { @@ -256,7 +260,7 @@ function renderControls(controls) { btn.textContent = ctrl.label; btn.onclick = () => { if (ws && ws.readyState === 1) { - ws.send(JSON.stringify({ type: 'action', action: ctrl.action, data: ctrl.data || {} })); + ws.send(JSON.stringify({ type: 'action', action: ctrl.action, data: ctrl.payload || ctrl.data || {} })); addTrace('runtime', 'action', ctrl.action); } }; @@ -295,6 +299,12 @@ function renderControls(controls) { } container.appendChild(table); + } else if (ctrl.type === 'label') { + const lbl = document.createElement('div'); + lbl.className = 'control-label'; + lbl.innerHTML = '' + esc(ctrl.text || '') + '' + esc(String(ctrl.value ?? '')) + ''; + container.appendChild(lbl); + } else if (ctrl.type === 'process') { const card = document.createElement('div'); card.className = 'process-card ' + (ctrl.status || 'running'); @@ -355,6 +365,30 @@ function updateMeter(node, tokens, maxTokens, fillPct) { function scroll(el) { el.scrollTop = el.scrollHeight; } function esc(s) { const d = document.createElement('span'); d.textContent = s; return d.innerHTML; } + +function renderMarkdown(text) { + // Escape HTML first + let html = esc(text); + // Code blocks (``` ... ```) + html = html.replace(/```(\w*)\n([\s\S]*?)```/g, (_, lang, code) => '
' + code.trim() + '
'); + // Inline code + html = html.replace(/`([^`]+)`/g, '$1'); + // Bold + html = html.replace(/\*\*(.+?)\*\*/g, '$1'); + // Italic + html = html.replace(/\*(.+?)\*/g, '$1'); + // Headers + html = html.replace(/^### (.+)$/gm, '

$1

'); + html = html.replace(/^## (.+)$/gm, '

$1

'); + html = html.replace(/^# (.+)$/gm, '

$1

'); + // Unordered lists + html = html.replace(/^[*-] (.+)$/gm, '
  • $1
  • '); + html = html.replace(/(
  • .*<\/li>\n?)+/g, m => '
      ' + m + '
    '); + // Line breaks (double newline = paragraph break) + html = html.replace(/\n\n/g, '

    '); + html = html.replace(/\n/g, '
    '); + return html; +} function truncate(s, n) { return s.length > n ? s.slice(0, n) + '\u2026' : s; } function addMsg(role, text) { @@ -474,7 +508,7 @@ function dockControls(controls) { btn.textContent = ctrl.label; btn.onclick = () => { if (ws && ws.readyState === 1) { - ws.send(JSON.stringify({ type: 'action', action: ctrl.action, data: ctrl.data || {} })); + ws.send(JSON.stringify({ type: 'action', action: ctrl.action, data: ctrl.payload || ctrl.data || {} })); addTrace('runtime', 'action', ctrl.action); } }; @@ -509,6 +543,11 @@ function dockControls(controls) { table.appendChild(tr); } container.appendChild(table); + } else if (ctrl.type === 'label') { + const lbl = document.createElement('div'); + lbl.className = 'control-label'; + lbl.innerHTML = '' + esc(ctrl.text || '') + '' + esc(String(ctrl.value ?? '')) + ''; + container.appendChild(lbl); } } body.appendChild(container); diff --git a/static/index.html b/static/index.html index 10b1c4f..c83701a 100644 --- a/static/index.html +++ b/static/index.html @@ -18,6 +18,7 @@
    thinker
    output
    memorizer
    +
    ui
    sensor
    diff --git a/static/style.css b/static/style.css index 046739b..fec2579 100644 --- a/static/style.css +++ b/static/style.css @@ -14,6 +14,7 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0; #meter-output .nm-label { color: #34d399; } #meter-memorizer .nm-label { color: #c084fc; } #meter-thinker .nm-label { color: #fb923c; } +#meter-ui .nm-label { color: #34d399; } #meter-sensor .nm-label { color: #60a5fa; } .nm-bar { flex: 1; height: 6px; background: #1a1a1a; border-radius: 3px; overflow: hidden; } .nm-fill { height: 100%; width: 0%; border-radius: 3px; transition: width 0.3s, background-color 0.3s; background: #333; } @@ -34,6 +35,16 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0; .msg.user { align-self: flex-end; background: #2563eb; color: white; } .msg.assistant { align-self: flex-start; background: #1e1e1e; border: 1px solid #333; } .msg.assistant.streaming { border-color: #2563eb; } +.msg.assistant h2, .msg.assistant h3, .msg.assistant h4 { margin: 0.3rem 0 0.2rem; color: #e0e0e0; } +.msg.assistant h2 { font-size: 1rem; } +.msg.assistant h3 { font-size: 0.95rem; } +.msg.assistant h4 { font-size: 0.9rem; } +.msg.assistant strong { color: #fff; } +.msg.assistant code { background: #2a2a3a; padding: 0.1rem 0.3rem; border-radius: 0.2rem; font-size: 0.85em; } +.msg.assistant pre { background: #1a1a2a; padding: 0.5rem; border-radius: 0.3rem; margin: 0.3rem 0; overflow-x: auto; } +.msg.assistant pre code { background: none; padding: 0; } +.msg.assistant ul { margin: 0.2rem 0; padding-left: 1.2rem; } +.msg.assistant li { margin: 0.1rem 0; } /* Input bar */ #input-bar { display: flex; gap: 0.5rem; padding: 0.75rem; background: #111; border-top: 1px solid #222; } @@ -56,6 +67,7 @@ button:hover { background: #1d4ed8; } .trace-node.thinker { color: #fb923c; } .trace-node.runtime { color: #60a5fa; } .trace-node.process { color: #f97316; } +.trace-node.ui { color: #34d399; } .trace-node.sensor { color: #60a5fa; } .trace-event { color: #888; flex-shrink: 0; min-width: 6rem; } @@ -70,6 +82,9 @@ button:hover { background: #1d4ed8; } .controls-container { padding: 0.4rem 0; display: flex; flex-wrap: wrap; gap: 0.4rem; align-items: flex-start; } .control-btn { padding: 0.35rem 0.75rem; background: #1e3a5f; color: #60a5fa; border: 1px solid #2563eb; border-radius: 0.3rem; cursor: pointer; font-size: 0.8rem; } .control-btn:hover { background: #2563eb; color: white; } +.control-label { display: flex; justify-content: space-between; align-items: center; padding: 0.3rem 0.5rem; background: #1a1a2e; border-radius: 0.3rem; font-size: 0.8rem; } +.cl-text { color: #888; } +.cl-value { color: #e0e0e0; font-weight: 600; font-family: monospace; } .control-table { width: 100%; border-collapse: collapse; font-size: 0.8rem; background: #111; border-radius: 0.3rem; overflow: hidden; } .control-table th { background: #1a1a2e; color: #a78bfa; padding: 0.3rem 0.5rem; text-align: left; font-weight: 600; border-bottom: 1px solid #333; } .control-table td { padding: 0.25rem 0.5rem; border-bottom: 1px solid #1a1a1a; color: #ccc; }