agent-runtime/test_nodes/test_director_v2.py

"""Unit tests for DirectorNode v2 — always-on brain, drives thinker."""

import json
from unittest.mock import AsyncMock, patch

from harness import HudCapture, make_command, make_history, NodeTestRunner


# ---- helpers ----

def mock_llm_json(obj):
    """Return an AsyncMock that returns JSON string (no tools)."""
    async def _call(model, messages, **kw):
        if kw.get("tools"):
            return json.dumps(obj), []
        return json.dumps(obj)
    return _call


def make_director():
    from agent.nodes.director_v2 import DirectorV2Node
    hud = HudCapture()
    node = DirectorV2Node(send_hud=hud)
    return node, hud


# ---- tests ----

async def test_returns_director_plan():
    """Director v2 should return a DirectorPlan, not just a style directive."""
    from agent.types import DirectorPlan
    node, hud = make_director()
    cmd = make_command(intent="request", topic="database query",
                       text="how many customers are there?", complexity="complex")
    mock_response = {
        "goal": "count customers",
        "steps": ["query_db('SELECT COUNT(*) FROM kunden')"],
        "present_as": "summary",
        "tool_sequence": [{"tool": "query_db", "args": {"query": "SELECT COUNT(*) FROM kunden", "database": "eras2_production"}}],
        "reasoning": "simple count query",
        "response_hint": "",
    }
    with patch("agent.nodes.director_v2.llm_call", side_effect=mock_llm_json(mock_response)):
        plan = await node.decide(cmd, [], memory_context="")
    assert isinstance(plan, DirectorPlan), f"got {type(plan)}"
    assert plan.goal == "count customers"
    assert len(plan.tool_sequence) == 1
    assert plan.tool_sequence[0]["tool"] == "query_db"


async def test_direct_response_for_simple():
    """Simple questions should get response_hint, no tool_sequence."""
    node, hud = make_director()
    cmd = make_command(intent="question", topic="greeting", text="hey how are you?",
                       complexity="trivial")
    mock_response = {
        "goal": "respond to greeting",
        "steps": [],
        "present_as": "summary",
        "tool_sequence": [],
        "reasoning": "social greeting, no tools needed",
        "response_hint": "Respond warmly to the greeting",
    }
    with patch("agent.nodes.director_v2.llm_call", side_effect=mock_llm_json(mock_response)):
        plan = await node.decide(cmd, [], memory_context="")
    assert plan.is_direct_response, "should be direct response"
    assert not plan.has_tools, "should have no tools"
    assert plan.response_hint


async def test_multi_step_plan():
    """Complex requests should produce multi-step tool_sequence."""
    node, hud = make_director()
    cmd = make_command(intent="request", topic="customer devices",
                       text="show customers with most devices", complexity="complex")
    mock_response = {
        "goal": "find customers with most devices",
        "steps": [
            "Step 1: query_db to count devices per customer",
            "Step 2: present top 10 as table",
        ],
        "present_as": "table",
        "tool_sequence": [
            {"tool": "query_db", "args": {"query": "SELECT k.name, COUNT(g.id) as cnt FROM kunden k JOIN geraete g ON g.kunden_id = k.id GROUP BY k.id ORDER BY cnt DESC LIMIT 10", "database": "eras2_production"}},
            {"tool": "emit_display", "args": {"items": [{"type": "text", "label": "Top customers by device count"}]}},
        ],
        "reasoning": "join kunden and geraete, aggregate, sort",
        "response_hint": "",
    }
    with patch("agent.nodes.director_v2.llm_call", side_effect=mock_llm_json(mock_response)):
        plan = await node.decide(cmd, [], memory_context="")
    assert plan.has_tools
    assert len(plan.tool_sequence) == 2
    assert plan.present_as == "table"


async def test_emits_hud_events():
    """Director v2 should emit thinking + decided HUD events."""
    node, hud = make_director()
    cmd = make_command(intent="question", text="hello")
    mock_response = {
        "goal": "greet", "steps": [], "present_as": "summary",
        "tool_sequence": [], "reasoning": "simple", "response_hint": "say hi",
    }
    with patch("agent.nodes.director_v2.llm_call", side_effect=mock_llm_json(mock_response)):
        await node.decide(cmd, [], memory_context="")
    assert hud.has("thinking"), f"missing thinking: {[e['event'] for e in hud.events]}"
    assert hud.has("decided"), f"missing decided: {[e['event'] for e in hud.events]}"


async def test_still_updates_style_directive():
    """Director v2 should still maintain mode/style for Output node."""
    node, hud = make_director()
    cmd = make_command(intent="request", tone="frustrated",
                       text="nothing works", complexity="simple")
    mock_response = {
        "goal": "help debug",
        "steps": [],
        "present_as": "summary",
        "tool_sequence": [],
        "reasoning": "user frustrated, be patient",
        "response_hint": "Acknowledge frustration, offer to help step by step",
        "mode": "debugging",
        "style": "patient and structured",
    }
    with patch("agent.nodes.director_v2.llm_call", side_effect=mock_llm_json(mock_response)):
        plan = await node.decide(cmd, [], memory_context="")
    assert node.directive["mode"] == "debugging"
    assert "patient" in node.directive["style"].lower()


async def test_history_included_in_context():
    """Director should use conversation history for context."""
    node, hud = make_director()
    cmd = make_command(intent="request", text="now show the details")
    history = make_history([
        ("user", "show me customers"),
        ("assistant", "Here are the top customers..."),
    ])
    mock_response = {
        "goal": "show details", "steps": [], "present_as": "summary",
        "tool_sequence": [{"tool": "query_db", "args": {"query": "SELECT * FROM kunden LIMIT 5", "database": "eras2_production"}}],
        "reasoning": "follow-up from customer list", "response_hint": "",
    }
    captured_messages = []

    async def capture_llm(model, messages, **kw):
        captured_messages.extend(messages)
        if kw.get("tools"):
            return json.dumps(mock_response), []
        return json.dumps(mock_response)

    with patch("agent.nodes.director_v2.llm_call", side_effect=capture_llm):
        await node.decide(cmd, history, memory_context="")
    # History messages should appear in the LLM context
    contents = [m["content"] for m in captured_messages]
    assert any("show me customers" in c for c in contents), "history not in context"


async def test_bad_json_returns_fallback():
    """If LLM returns garbage, Director should return a safe fallback plan."""
    node, hud = make_director()
    cmd = make_command(intent="question", text="hello")

    async def bad_llm(model, messages, **kw):
        if kw.get("tools"):
            return "not json at all {{{", []
        return "not json at all {{{"

    with patch("agent.nodes.director_v2.llm_call", side_effect=bad_llm):
        plan = await node.decide(cmd, [], memory_context="")
    # Should not crash — should return a fallback
    assert plan.is_direct_response, "fallback should be direct response"
    assert plan.response_hint, "fallback should have response_hint"


if __name__ == "__main__":
    runner = NodeTestRunner()
    print("\n=== DirectorNode v2 ===")
    runner.test("returns DirectorPlan", test_returns_director_plan())
    runner.test("direct response for simple", test_direct_response_for_simple())
    runner.test("multi-step plan", test_multi_step_plan())
    runner.test("emits HUD events", test_emits_hud_events())
    runner.test("still updates style directive", test_still_updates_style_directive())
    runner.test("history included in context", test_history_included_in_context())
    runner.test("bad JSON returns fallback", test_bad_json_returns_fallback())
    p, f = runner.summary()
    print(f"\n  {p} passed, {f} failed")