agent-runtime/agent/nodes/output.py

"""Output Node: streams natural response to the user."""

import json
import logging

from fastapi import WebSocket

from .base import Node
from ..llm import llm_call
from ..types import Command

log = logging.getLogger("runtime")


class OutputNode(Node):
    name = "output"
    model = "google/gemini-2.0-flash-001"
    max_context_tokens = 4000

    SYSTEM = """You are the Output node — the voice of this cognitive runtime.
The Input node sends you its perception of what the user said. This is internal context for you — never repeat or echo it.
You respond to the USER, not to the Input node. Use the perception to understand intent, then act on it.
Be natural. Be concise. If the user asks you to do something, do it — don't describe what you're about to do.

{memory_context}"""

    async def process(self, command: Command, history: list[dict], ws: WebSocket, memory_context: str = "") -> str:
        await self.hud("streaming")

        messages = [
            {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
        ]
        for msg in history[-20:]:
            messages.append(msg)
        messages.append({"role": "system", "content": f"Input perception: {command.instruction}"})
        messages = self.trim_context(messages)

        await self.hud("context", messages=messages, tokens=self.last_context_tokens,
                       max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)

        client, resp = await llm_call(self.model, messages, stream=True)
        full_response = ""
        try:
            async for line in resp.aiter_lines():
                if not line.startswith("data: "):
                    continue
                payload = line[6:]
                if payload == "[DONE]":
                    break
                chunk = json.loads(payload)
                delta = chunk["choices"][0].get("delta", {})
                token = delta.get("content", "")
                if token:
                    full_response += token
                    await ws.send_text(json.dumps({"type": "delta", "content": token}))
        finally:
            await resp.aclose()
            await client.aclose()

        log.info(f"[output] response: {full_response[:100]}...")
        await ws.send_text(json.dumps({"type": "done"}))
        await self.hud("done")
        return full_response