v0.8.2: fix pipeline — skip Output for tools, process HUD, inline controls, structured actions

- Thinker tool results stream directly to user, skipping Output node (halves latency) - ProcessManager process_start/process_done events render as live cards in chat - UI controls sent before response text, not after - Button clicks route to handle_action(), skip Input, go straight to Thinker - Fix Thinker model: gemini-2.5-flash-preview -> gemini-2.5-flash (old ID expired) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 01:43:07 +01:00 · 2026-03-28 01:43:07 +01:00 · 231f81bc52
commit 231f81bc52
parent 7458b2ea35
5 changed files with 83 additions and 17 deletions
--- a/agent/api.py
+++ b/agent/api.py
@ -90,10 +90,7 @@ def register_routes(app):
                data = await ws.receive_text()
                msg = json.loads(data)
                if msg.get("type") == "action":
-                    action_text = f"[user clicked: {msg.get('action', 'unknown')}]"
-                    if msg.get("data"):
-                        action_text += f" data: {json.dumps(msg['data'])}"
-                    await runtime.handle_message(action_text)
+                    await runtime.handle_action(msg.get("action", "unknown"), msg.get("data"))
                elif msg.get("type") == "cancel_process":
                    runtime.process_manager.cancel(msg.get("pid", 0))
                else:
--- a/agent/nodes/thinker.py
+++ b/agent/nodes/thinker.py
@ -14,7 +14,7 @@ log = logging.getLogger("runtime")

 class ThinkerNode(Node):
    name = "thinker"
-    model = "google/gemini-2.5-flash-preview"
+    model = "google/gemini-2.5-flash"
    max_context_tokens = 4000

    SYSTEM = """You are the Thinker node — the brain of this cognitive runtime.
--- a/agent/runtime.py
+++ b/agent/runtime.py
@ -55,6 +55,43 @@ class Runtime:
            log.error(f"trace write error: {e}")
        self._broadcast(trace_entry)

+    async def _stream_text(self, text: str):
+        """Stream pre-formed text to the client as deltas, simulating LLM output."""
+        # Send in chunks to feel natural
+        chunk_size = 12
+        for i in range(0, len(text), chunk_size):
+            chunk = text[i:i + chunk_size]
+            await self.ws.send_text(json.dumps({"type": "delta", "content": chunk}))
+        await self.ws.send_text(json.dumps({"type": "done"}))
+
+    async def handle_action(self, action: str, data: dict = None):
+        """Handle a structured UI action (button click etc.)."""
+        # Format as a structured message that Thinker can parse
+        action_desc = f"ACTION: {action}"
+        if data:
+            action_desc += f" | data: {json.dumps(data)}"
+        # Add to history as a system-level event, not user speech
+        self.history.append({"role": "user", "content": action_desc})
+        self.sensor.note_user_activity()
+
+        sensor_lines = self.sensor.get_context_lines()
+        mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines)
+
+        # Skip Input node — this isn't speech to perceive, go straight to Thinker
+        command = Command(instruction=f"User clicked UI button: {action}", source_text=action_desc)
+        thought = await self.thinker.process(command, self.history, memory_context=mem_ctx)
+
+        if thought.controls:
+            await self.ws.send_text(json.dumps({"type": "controls", "controls": thought.controls}))
+
+        await self._stream_text(thought.response)
+        self.history.append({"role": "assistant", "content": thought.response})
+
+        await self.memorizer.update(self.history)
+
+        if len(self.history) > self.MAX_HISTORY:
+            self.history = self.history[-self.MAX_HISTORY:]
+
    async def handle_message(self, text: str):
        envelope = Envelope(
            text=text,
@ -75,23 +112,24 @@ class Runtime:

        thought = await self.thinker.process(command, self.history, memory_context=mem_ctx)

+        # Send controls inline (before response text)
+        if thought.controls:
+            await self.ws.send_text(json.dumps({"type": "controls", "controls": thought.controls}))
+
        if thought.tool_used:
-            command = Command(
-                instruction=f"Thinker used {thought.tool_used} and says: {thought.response}",
-                source_text=command.source_text
-            )
+            # Thinker already formulated response from tool output — stream directly
+            await self._stream_text(thought.response)
+            response = thought.response
        else:
+            # Pure conversation — Output node adds personality and streams
            command = Command(
                instruction=f"Thinker says: {thought.response}",
                source_text=command.source_text
            )
+            response = await self.output_node.process(command, self.history, self.ws, memory_context=mem_ctx)

-        response = await self.output_node.process(command, self.history, self.ws, memory_context=mem_ctx)
        self.history.append({"role": "assistant", "content": response})

-        if thought.controls:
-            await self.ws.send_text(json.dumps({"type": "controls", "controls": thought.controls}))
-
        await self.memorizer.update(self.history)

        if len(self.history) > self.MAX_HISTORY:
--- a/static/app.js
+++ b/static/app.js
@ -170,11 +170,13 @@ function handleHud(data) {
    const detail = JSON.stringify(data.state, null, 2);
    addTrace(node, 'state', pairs, 'state', detail);

-  } else if (event === 'tool_call') {
-    addTrace(node, 'tool: ' + data.tool, truncate(data.code || '', 80), 'instruction', data.code);
+  } else if (event === 'process_start') {
+    addTrace(node, 'run ' + (data.tool || 'python'), truncate(data.code || '', 80), 'instruction', data.code);
+    showProcessCard(data.pid, data.tool || 'python', data.code || '');

-  } else if (event === 'tool_result') {
-    addTrace(node, 'result', truncate(data.output || '', 80), '', data.output);
+  } else if (event === 'process_done') {
+    addTrace(node, (data.exit_code === 0 ? 'done' : 'failed'), truncate(data.output || '', 80), data.exit_code === 0 ? '' : 'error', data.output);
+    updateProcessCard(data.pid, data.exit_code === 0 ? 'done' : 'failed', data.output || '', data.elapsed);

  } else if (event === 'error') {
    addTrace(node, 'error', data.detail || '', 'error');
@ -310,6 +312,32 @@ function cancelProcess(pid) {
  }
 }

+function showProcessCard(pid, tool, code) {
+  const card = document.createElement('div');
+  card.className = 'process-card running';
+  card.id = 'proc-' + pid;
+  card.innerHTML =
+    '<span class="pc-tool">' + esc(tool) + '</span>' +
+    '<span class="pc-status">running</span>' +
+    '<button class="pc-stop" onclick="cancelProcess(' + pid + ')">Stop</button>' +
+    '<pre class="pc-code">' + esc(truncate(code, 200)) + '</pre>' +
+    '<pre class="pc-output"></pre>';
+  msgs.appendChild(card);
+  scroll(msgs);
+}
+
+function updateProcessCard(pid, status, output, elapsed) {
+  const card = document.getElementById('proc-' + pid);
+  if (!card) return;
+  card.className = 'process-card ' + status;
+  const statusEl = card.querySelector('.pc-status');
+  if (statusEl) statusEl.textContent = status + (elapsed ? ' (' + elapsed + 's)' : '');
+  const stopBtn = card.querySelector('.pc-stop');
+  if (stopBtn) stopBtn.remove();
+  const outEl = card.querySelector('.pc-output');
+  if (outEl && output) outEl.textContent = output;
+}
+
 function updateMeter(node, tokens, maxTokens, fillPct) {
  const meter = document.getElementById('meter-' + node);
  if (!meter) return;
--- a/static/style.css
+++ b/static/style.css
@ -55,6 +55,8 @@ button:hover { background: #1d4ed8; }
 .trace-node.memorizer { color: #c084fc; }
 .trace-node.thinker { color: #fb923c; }
 .trace-node.runtime { color: #60a5fa; }
+.trace-node.process { color: #f97316; }
+.trace-node.sensor { color: #60a5fa; }

 .trace-event { color: #888; flex-shrink: 0; min-width: 6rem; }

@ -79,6 +81,7 @@ button:hover { background: #1d4ed8; }
 .pc-tool { font-weight: 700; color: #fb923c; margin-right: 0.5rem; }
 .pc-status { color: #888; margin-right: 0.5rem; }
 .pc-stop { padding: 0.15rem 0.4rem; background: #ef4444; color: white; border: none; border-radius: 0.2rem; cursor: pointer; font-size: 0.7rem; }
+.pc-code { margin-top: 0.3rem; color: #666; white-space: pre-wrap; max-height: 4rem; overflow-y: auto; font-size: 0.7rem; }
 .pc-output { margin-top: 0.3rem; color: #888; white-space: pre-wrap; max-height: 8rem; overflow-y: auto; }

 /* Expandable trace detail */