agent-runtime/agent/nodes/ui.py

"""UI Node: renders interactive elements to the awareness panel workspace."""

import json
import logging

from .base import Node
from ..llm import llm_call
from ..types import ThoughtResult

log = logging.getLogger("runtime")


class UINode(Node):
    name = "ui"
    model = "google/gemini-2.0-flash-001"
    max_context_tokens = 3000

    SYSTEM = """You are the UI node of a cognitive agent runtime.

You render interactive elements to a workspace panel in the browser. A separate Output node handles all text — you NEVER write prose, explanations, or messages.

YOUR OUTPUT: A JSON array of UI elements, or [] if nothing to show.

ELEMENT TYPES:

label — display a value:
  {{"type": "label", "id": "unique_id", "text": "Label Text", "value": "current value"}}

button — clickable action:
  {{"type": "button", "label": "Short Label", "action": "action_name", "payload": {{"key": "value"}}}}

table — structured data:
  {{"type": "table", "columns": ["col1", "col2"], "data": [{{"col1": "val", "col2": "val"}}]}}

RULES:
- Output ONLY a valid JSON array. No text, no markdown, no explanation.
- Labels: show key values the user asked about or that resulted from tool execution.
- Buttons: offer clear follow-up actions. Keep labels 2-4 words. Action is snake_case.
- Tables: when tool output contains structured/tabular data.
- Return [] when the response is purely conversational with no actionable data.
- Every element you emit REPLACES the entire workspace. Include all elements that should be visible.

CURRENT WORKSPACE:
{current_controls}"""

    def __init__(self, send_hud):
        super().__init__(send_hud)
        self.current_controls: list[dict] = []

    async def process(self, thought: ThoughtResult, history: list[dict],
                      memory_context: str = "") -> list[dict]:
        await self.hud("thinking", detail="deciding UI controls")

        # Show UI what's currently rendered
        if self.current_controls:
            ctrl_desc = json.dumps(self.current_controls, indent=2)
        else:
            ctrl_desc = "(empty)"

        messages = [
            {"role": "system", "content": self.SYSTEM.format(current_controls=ctrl_desc)},
        ]

        for msg in history[-6:]:
            messages.append(msg)

        ctx = f"Thinker response: {thought.response}"
        if thought.tool_used:
            ctx += f"\n\nTool: {thought.tool_used}\nTool output:\n{thought.tool_output}"
        messages.append({"role": "system", "content": ctx})
        messages.append({"role": "user", "content": "What UI elements should the workspace show now? Return JSON array."})

        messages = self.trim_context(messages)
        await self.hud("context", messages=messages, tokens=self.last_context_tokens,
                       max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)

        raw = await llm_call(self.model, messages)
        log.info(f"[ui] raw: {raw[:200]}")

        text = raw.strip()
        if text.startswith("```"):
            text = text.split("\n", 1)[1] if "\n" in text else text[3:]
            if text.endswith("```"):
                text = text[:-3]
            text = text.strip()

        try:
            controls = json.loads(text)
            if not isinstance(controls, list):
                controls = []
        except (json.JSONDecodeError, Exception) as e:
            log.error(f"[ui] parse error: {e}, raw: {text[:200]}")
            controls = []

        if controls:
            self.current_controls = controls
            await self.hud("controls", controls=controls)
            log.info(f"[ui] emitting {len(controls)} controls")
        else:
            await self.hud("decided", instruction="no controls needed")

        return controls