agent-runtime/agent/nodes/output.py

"""Output Node: renders Thinker's reasoning into device-appropriate responses."""

import json
import logging

from fastapi import WebSocket

from .base import Node
from ..llm import llm_call
from ..types import Command, ThoughtResult

log = logging.getLogger("runtime")


class OutputNode(Node):
    name = "output"
    model = "google/gemini-2.0-flash-001"
    max_context_tokens = 4000

    SYSTEM = """You are the Output node — the renderer of this cognitive runtime.

DEVICE: The user is on a web browser (Chrome, desktop). Your output renders in an HTML chat panel.
You can use markdown: **bold**, *italic*, `code`, ```code blocks```, lists, headers.
The chat panel renders markdown to HTML — use it for structure when helpful.

YOUR JOB: Transform the Thinker's reasoning into a polished, user-facing response.
- The Thinker reasons and may use tools. You receive its output and render it for the human.
- NEVER echo internal node names, perceptions, or system details.
- NEVER say "the Thinker decided..." or "I'll process..." — just deliver the answer.
- If the Thinker ran a tool and got output, weave the results into a natural response.
- If the Thinker gave a direct answer, refine and format it — don't just repeat it.
- Keep the user's language — if they wrote German, respond in German.
- Be concise but complete. Use formatting to make data scannable.

{memory_context}"""

    async def process(self, thought: ThoughtResult, history: list[dict],
                      ws: WebSocket, memory_context: str = "") -> str:
        await self.hud("streaming")

        messages = [
            {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
        ]
        for msg in history[-20:]:
            messages.append(msg)

        # Give Output the full Thinker result to render
        thinker_ctx = f"Thinker response: {thought.response}"
        if thought.tool_used:
            thinker_ctx += f"\n\nTool used: {thought.tool_used}\nTool output:\n{thought.tool_output}"
        if thought.controls:
            thinker_ctx += f"\n\n(UI controls were also sent to the user: {len(thought.controls)} elements)"
        messages.append({"role": "system", "content": thinker_ctx})

        messages = self.trim_context(messages)

        await self.hud("context", messages=messages, tokens=self.last_context_tokens,
                       max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)

        client, resp = await llm_call(self.model, messages, stream=True)
        full_response = ""
        try:
            async for line in resp.aiter_lines():
                if not line.startswith("data: "):
                    continue
                payload = line[6:]
                if payload == "[DONE]":
                    break
                chunk = json.loads(payload)
                delta = chunk["choices"][0].get("delta", {})
                token = delta.get("content", "")
                if token:
                    full_response += token
                    await ws.send_text(json.dumps({"type": "delta", "content": token}))
        finally:
            await resp.aclose()
            await client.aclose()

        log.info(f"[output] response: {full_response[:100]}...")
        await ws.send_text(json.dumps({"type": "done"}))
        await self.hud("done")
        return full_response