"""Unit tests for ThinkerNode v1 — reasoning, tool calls, audit.""" from harness import HudCapture, make_command, make_history, NodeTestRunner from agent.nodes.thinker_v1 import ThinkerNode from agent.process import ProcessManager def make_thinker(): hud = HudCapture() pm = ProcessManager(send_hud=hud) node = ThinkerNode(send_hud=hud, process_manager=pm) return node, hud async def test_simple_response(): """Thinker produces a text response or tool call for a simple question.""" node, hud = make_thinker() cmd = make_command(intent="question", topic="greeting", text="say hello to me") thought = await node.process(cmd, [], memory_context="") has_output = bool(thought.response) or bool(thought.actions) or bool(thought.tool_used) assert has_output, "no response, no actions, no tool used" async def test_no_code_in_response(): """Response should not contain code blocks (stripped by _strip_code_blocks).""" node, hud = make_thinker() cmd = make_command(intent="request", topic="create buttons", text="create two buttons: red and blue") thought = await node.process(cmd, [], memory_context="") assert "```" not in thought.response, f"code block leaked: {thought.response[:100]}" async def test_emits_tool_calls_for_buttons(): """When asked to create buttons, Thinker should call emit_actions.""" node, hud = make_thinker() cmd = make_command(intent="request", topic="create buttons", text="create two buttons: Alpha and Beta") thought = await node.process(cmd, [], memory_context="") assert thought.actions, "no actions emitted" labels = [a.get("label", "").lower() for a in thought.actions] assert any("alpha" in l for l in labels), f"no Alpha button: {labels}" async def test_query_db_called(): """When asked about database, Thinker should call query_db.""" node, hud = make_thinker() cmd = make_command(intent="request", topic="database customers", text="how many customers are in the database?") thought = await node.process(cmd, [], memory_context="") assert thought.tool_used == "query_db" or hud.has("tool_call"), \ f"tool_used={thought.tool_used}, hud events: {[e.get('event') for e in hud.events]}" async def test_s3_audit_code_without_tools(): """S3* audit should fire when code is written without tool calls.""" node, hud = make_thinker() # This is hard to trigger deterministically — we check the audit mechanism exists # by verifying the HUD capture works cmd = make_command(intent="request", topic="create machine", text="create a state machine called test with states a and b") thought = await node.process(cmd, [], memory_context="") # If S3* fired, there will be an s3_audit event audit_events = hud.find("s3_audit") # Either S3* fired (model wrote code) or model called tools correctly — both OK if audit_events: print(f" S3* fired: {audit_events[0].get('detail', '')[:80]}") elif thought.machine_ops: print(f" Tools called directly: {len(thought.machine_ops)} machine ops") async def test_decided_hud_emitted(): """Thinker should emit a 'decided' HUD event.""" node, hud = make_thinker() cmd = make_command(intent="question", text="hello") await node.process(cmd, [], memory_context="") assert hud.has("decided"), f"no decided event: {[e.get('event') for e in hud.events]}" if __name__ == "__main__": runner = NodeTestRunner() print("\n=== ThinkerNode v1 ===") runner.test("simple response", test_simple_response()) runner.test("no code in response", test_no_code_in_response()) runner.test("emits tool calls for buttons", test_emits_tool_calls_for_buttons()) runner.test("query_db called for DB question", test_query_db_called()) runner.test("S3* audit mechanism", test_s3_audit_code_without_tools()) runner.test("decided HUD emitted", test_decided_hud_emitted()) p, f = runner.summary() print(f"\n {p} passed, {f} failed")