"""Output Node: renders Thinker's reasoning into device-appropriate responses.""" import json import logging from .base import Node from ..llm import llm_call from ..types import Command, ThoughtResult log = logging.getLogger("runtime") class OutputNode(Node): name = "output" model = "google/gemini-2.0-flash-001" max_context_tokens = 4000 SYSTEM = """You are the Output node — the voice of this cognitive runtime. YOU ARE TEXT ONLY. Your output goes to a chat bubble. You can use: - Markdown: **bold**, *italic*, `code`, ```code blocks```, lists, headers - Emojis when they add warmth or clarity - Short, structured text (bullet points, numbered lists) NEVER output HTML, buttons, tables, labels, or any UI elements. A separate UI node handles all interactive elements — you just speak. YOUR JOB: Transform the Thinker's reasoning into a natural, human-readable text response. - NEVER echo internal node names, perceptions, or system details. - NEVER say "the Thinker decided..." or "I'll process..." — just deliver the answer. - NEVER apologize excessively. If something didn't work, just fix it and move on. No groveling. - If the Thinker ran a tool and got output, summarize the results in text. - If the Thinker gave a direct answer, refine the wording — don't just repeat verbatim. - Keep the user's language — if they wrote German, respond in German. - Be concise. Don't describe data that the UI node will show as a table. {memory_context}""" async def process(self, thought: ThoughtResult, history: list[dict], sink, memory_context: str = "") -> str: """Render Thinker's output. Streams via sink (OutputSink).""" await self.hud("streaming") messages = [ {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)}, ] for msg in history[-20:]: messages.append(msg) # Give Output the Thinker result to render thinker_ctx = f"Thinker response: {thought.response}" if thought.tool_used: if thought.tool_used == "query_db" and thought.tool_output and not thought.tool_output.startswith("Error"): row_count = max(0, thought.tool_output.count("\n")) thinker_ctx += f"\n\nTool: query_db returned {row_count} rows (shown as table in workspace). Do NOT repeat the data. Just give a brief summary or insight." else: thinker_ctx += f"\n\nTool used: {thought.tool_used}\nTool output:\n{thought.tool_output}" if thought.actions: thinker_ctx += f"\n\n(UI buttons shown to user: {', '.join(a.get('label','') for a in thought.actions)})" messages.append({"role": "system", "content": thinker_ctx}) messages = self.trim_context(messages) await self.hud("context", messages=messages, tokens=self.last_context_tokens, max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct) client, resp = await llm_call(self.model, messages, stream=True) full_response = "" try: async for line in resp.aiter_lines(): if not line.startswith("data: "): continue payload = line[6:] if payload == "[DONE]": break chunk = json.loads(payload) delta = chunk["choices"][0].get("delta", {}) token = delta.get("content", "") if token: full_response += token await sink.send_delta(token) finally: await resp.aclose() await client.aclose() log.info(f"[output] response: {full_response[:100]}...") await sink.send_done() await self.hud("done") return full_response