agent-runtime/agent/nodes/output_v1.py

"""Output Node: renders Thinker's reasoning into device-appropriate responses."""

import json
import logging

from .base import Node
from ..llm import llm_call
from ..types import Command, ThoughtResult

log = logging.getLogger("runtime")


class OutputNode(Node):
    name = "output"
    model = "google/gemini-2.0-flash-001"
    max_context_tokens = 4000

    SYSTEM = """You are the Output node — the voice of this cognitive runtime.

YOU ARE TEXT ONLY. Your output goes to a chat bubble. You can use:
- Markdown: **bold**, *italic*, `code`, ```code blocks```, lists, headers
- Emojis when they add warmth or clarity
- Short, structured text (bullet points, numbered lists)

NEVER output HTML, buttons, tables, labels, or any UI elements.
A separate UI node handles all interactive elements — you just speak.

YOUR JOB: Transform the Thinker's reasoning into a natural, human-readable text response.
- NEVER echo internal node names, perceptions, or system details.
- NEVER say "the Thinker decided..." or "I'll process..." — just deliver the answer.
- NEVER apologize excessively. If something didn't work, just fix it and move on. No groveling.
- If the Thinker ran a tool and got output, summarize the results in text.
- If the Thinker gave a direct answer, refine the wording — don't just repeat verbatim.
- Keep the user's language — if they wrote German, respond in German.
- Be concise. Don't describe data that the UI node will show as a table.

PHRASING by user_expectation (from memorizer):
- "delegated": progress-report style. State what was done and what's next. No questions unless blocked.
- "waiting_input": acknowledge the user's answer and continue the flow naturally.
- "observing": keep it brief. No unsolicited follow-up questions or suggestions.
- "conversational": natural, warm dialogue. Follow-ups are fine.

{memory_context}"""

    async def process(self, thought: ThoughtResult, history: list[dict],
                      sink, memory_context: str = "") -> str:
        """Render Thinker's output. Streams via sink (OutputSink)."""
        await self.hud("streaming")

        messages = [
            {"role": "system", "content": self.SYSTEM.replace("{memory_context}", memory_context)},
        ]
        for msg in history[-20:]:
            messages.append(msg)

        # Give Output the Thinker result to render
        thinker_ctx = f"Thinker response: {thought.response}"
        if thought.tool_used:
            if thought.tool_used == "query_db" and thought.tool_output and not thought.tool_output.startswith("Error"):
                row_count = max(0, thought.tool_output.count("\n"))
                thinker_ctx += f"\n\nTool: query_db returned {row_count} rows (shown as table in workspace). Do NOT repeat the data. Just give a brief summary or insight."
            else:
                thinker_ctx += f"\n\nTool used: {thought.tool_used}\nTool output:\n{thought.tool_output}"
        if thought.actions:
            thinker_ctx += f"\n\n(UI buttons shown to user: {', '.join(a.get('label','') for a in thought.actions)})"
        messages.append({"role": "system", "content": thinker_ctx})

        messages = self.trim_context(messages)

        await self.hud("context", messages=messages, tokens=self.last_context_tokens,
                       max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)

        client, resp = await llm_call(self.model, messages, stream=True)
        full_response = ""
        try:
            async for line in resp.aiter_lines():
                if not line.startswith("data: "):
                    continue
                payload = line[6:]
                if payload == "[DONE]":
                    break
                chunk = json.loads(payload)
                delta = chunk["choices"][0].get("delta", {})
                token = delta.get("content", "")
                if token:
                    full_response += token
                    await sink.send_delta(token)
        finally:
            await resp.aclose()
            await client.aclose()

        log.info(f"[output] response: {full_response[:100]}...")
        await sink.send_done()
        await self.hud("done")
        return full_response