agent-runtime/agent/nodes/interpreter_v1.py

"""Interpreter Node v1: factual result summarizer — no hallucination."""

import json
import logging

from .base import Node
from ..llm import llm_call
from ..types import InterpretedResult

log = logging.getLogger("runtime")


class InterpreterNode(Node):
    name = "interpreter"
    model = "google/gemini-2.0-flash-001"
    max_context_tokens = 2000

    SYSTEM = """You are the Interpreter — a factual summarizer in a cognitive runtime.
You receive raw tool output (database results, computation output) and the user's original question.
Your job: produce a concise, FACTUAL summary.

CRITICAL RULES:
- ONLY state facts present in the tool output. NEVER add information not in the data.
- If the data shows 5 rows, say 5 — not "approximately 5" or "at least 5".
- For tabular data: highlight the key numbers, don't repeat every row.
- For empty results: say "no results found", don't speculate why.
- For errors: state the error clearly.

Output JSON:
{{
  "summary": "concise factual summary (1-3 sentences)",
  "row_count": 0,
  "key_facts": ["fact1", "fact2"],
  "confidence": "high | medium | low"
}}

Set confidence to "low" if the data is ambiguous or incomplete.
Output ONLY valid JSON."""

    async def interpret(self, tool_name: str, tool_output: str,
                        user_question: str) -> InterpretedResult:
        """Interpret tool output into a factual summary."""
        await self.hud("thinking", detail=f"interpreting {tool_name} result")

        messages = [
            {"role": "system", "content": self.SYSTEM},
            {"role": "user", "content": (
                f"Tool: {tool_name}\n"
                f"User asked: {user_question}\n\n"
                f"Raw output:\n{tool_output[:1500]}"
            )},
        ]

        raw = await llm_call(self.model, messages)
        log.info(f"[interpreter] raw: {raw[:200]}")

        result = self._parse_result(raw, tool_output)
        await self.hud("interpreted", summary=result.summary[:200],
                       row_count=result.row_count, confidence=result.confidence)
        return result

    def _parse_result(self, raw: str, tool_output: str) -> InterpretedResult:
        """Parse LLM output into InterpretedResult, with fallback."""
        text = raw.strip()
        if text.startswith("```"):
            text = text.split("\n", 1)[1] if "\n" in text else text[3:]
            if text.endswith("```"):
                text = text[:-3]
            text = text.strip()

        try:
            data = json.loads(text)
            return InterpretedResult(
                summary=data.get("summary", ""),
                row_count=data.get("row_count", 0),
                key_facts=data.get("key_facts", []),
                confidence=data.get("confidence", "medium"),
            )
        except (json.JSONDecodeError, Exception) as e:
            log.error(f"[interpreter] parse failed: {e}")
            # Fallback: use raw tool output as summary
            lines = tool_output.strip().split("\n")
            summary = tool_output[:200] if len(lines) <= 3 else f"{lines[0]} ({len(lines)-1} rows)"
            return InterpretedResult(
                summary=summary,
                row_count=max(0, len(lines) - 1),
                key_facts=[],
                confidence="low",
            )