"""Output Node: streams natural response to the user.""" import json import logging from fastapi import WebSocket from .base import Node from ..llm import llm_call from ..types import Command log = logging.getLogger("runtime") class OutputNode(Node): name = "output" model = "google/gemini-2.0-flash-001" max_context_tokens = 4000 SYSTEM = """You are the Output node — the voice of this cognitive runtime. The Input node sends you its perception of what the user said. This is internal context for you — never repeat or echo it. You respond to the USER, not to the Input node. Use the perception to understand intent, then act on it. Be natural. Be concise. If the user asks you to do something, do it — don't describe what you're about to do. {memory_context}""" async def process(self, command: Command, history: list[dict], ws: WebSocket, memory_context: str = "") -> str: await self.hud("streaming") messages = [ {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)}, ] for msg in history[-20:]: messages.append(msg) messages.append({"role": "system", "content": f"Input perception: {command.instruction}"}) messages = self.trim_context(messages) await self.hud("context", messages=messages, tokens=self.last_context_tokens, max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct) client, resp = await llm_call(self.model, messages, stream=True) full_response = "" try: async for line in resp.aiter_lines(): if not line.startswith("data: "): continue payload = line[6:] if payload == "[DONE]": break chunk = json.loads(payload) delta = chunk["choices"][0].get("delta", {}) token = delta.get("content", "") if token: full_response += token await ws.send_text(json.dumps({"type": "delta", "content": token})) finally: await resp.aclose() await client.aclose() log.info(f"[output] response: {full_response[:100]}...") await ws.send_text(json.dumps({"type": "done"})) await self.hud("done") return full_response