agent-runtime/agent/nodes/eras_expert.py

"""Eras Expert: heating/energy customer database specialist."""

import asyncio
import logging

from .expert_base import ExpertNode
from ..db import run_db_query

log = logging.getLogger("runtime")


class ErasExpertNode(ExpertNode):
    name = "eras_expert"
    default_database = "eras2_production"

    DOMAIN_SYSTEM = """You are the Eras expert — specialist for heating and energy customer data.
You work with the eras2_production database containing customer, device, and billing data.
All table and column names are German (lowercase). Common queries involve customer lookups,
device counts, consumption analysis, and billing reports."""

    SCHEMA = """Known tables (eras2_production):
- kunden — customers
- objekte — properties/objects linked to customers
- nutzeinheit — usage units within objects
- geraete — devices/meters
- geraeteverbraeuche — device consumption readings
- abrechnungen — billing records

CRITICAL: You do NOT know the exact column names. They are German and unpredictable.
Your FIRST tool_sequence step for ANY SELECT query MUST be DESCRIBE on the target table.
Then use the actual column names from the DESCRIBE result in your SELECT.

Example tool_sequence for "show me 5 customers":
[
  {{"tool": "query_db", "args": {{"query": "DESCRIBE kunden", "database": "eras2_production"}}}},
  {{"tool": "query_db", "args": {{"query": "SELECT * FROM kunden LIMIT 5", "database": "eras2_production"}}}}
]"""

    def __init__(self, send_hud, process_manager=None):
        super().__init__(send_hud, process_manager)
        self._schema_cache: dict[str, str] = {}  # table_name -> DESCRIBE result

    async def execute(self, job: str, language: str = "de"):
        """Execute with schema auto-discovery. Caches DESCRIBE results."""
        # Inject cached schema into the job context
        if self._schema_cache:
            schema_ctx = "Known column names from previous DESCRIBE:\n"
            for table, desc in self._schema_cache.items():
                # Just first 5 lines to keep it compact
                lines = desc.strip().split("\n")[:6]
                schema_ctx += f"\n{table}:\n" + "\n".join(lines) + "\n"
            job = job + "\n\n" + schema_ctx

        result = await super().execute(job, language)

        # Cache any DESCRIBE results from this execution
        # Parse from tool_output if it looks like a DESCRIBE result
        if result.tool_output and "Field\t" in result.tool_output:
            # Try to identify which table was described
            for table in ["kunden", "objekte", "nutzeinheit", "geraete",
                          "geraeteverbraeuche", "abrechnungen"]:
                if table in job.lower() or table in result.tool_output.lower():
                    self._schema_cache[table] = result.tool_output
                    log.info(f"[eras] cached schema for {table}")
                    break

        return result