v0.14.4: Interpreter wired in v2, tool_call convention, Haiku models, UI fix

- Wire Interpreter into v2 pipeline (after Thinker tool_output, before Output)
- Rename tool_exec -> tool_call everywhere (consistent convention across v1/v2)
- Switch Director v1+v2 to anthropic/claude-haiku-4.5 (was opus, reserved)
- Fix UI apply_machine_ops crash when states are strings instead of dicts
- Fix runtime_test.py async poll to match on message ID (prevent stale results)
- Add traceback to pipeline error logging

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nico 2026-03-29 06:06:13 +02:00
parent da92109550
commit 4c412d3c4b
10 changed files with 57 additions and 955 deletions

View File

@ -40,7 +40,7 @@ def _broadcast_sse(event: dict):
if _pipeline_result.get("status") == "running": if _pipeline_result.get("status") == "running":
node = event.get("node", "") node = event.get("node", "")
evt = event.get("event", "") evt = event.get("event", "")
if node and evt in ("thinking", "perceived", "decided", "streaming", "tool_exec", "interpreted", "updated"): if node and evt in ("thinking", "perceived", "decided", "streaming", "tool_call", "interpreted", "updated"):
_pipeline_result["stage"] = node _pipeline_result["stage"] = node
_pipeline_result["event"] = evt _pipeline_result["event"] = evt
@ -199,7 +199,8 @@ def register_routes(app):
"memorizer": runtime.memorizer.state, "memorizer": runtime.memorizer.state,
} }
except Exception as e: except Exception as e:
log.error(f"[api] pipeline error: {e}") import traceback
log.error(f"[api] pipeline error: {e}\n{traceback.format_exc()}")
_pipeline_result = { _pipeline_result = {
"status": "error", "status": "error",
"id": msg_id, "id": msg_id,

View File

@ -12,7 +12,7 @@ log = logging.getLogger("runtime")
class DirectorNode(Node): class DirectorNode(Node):
name = "director" name = "director"
model = "google/gemini-2.0-flash-001" model = "google/gemini-2.0-flash-001"
plan_model = "anthropic/claude-opus-4" # Smart model for investigation planning plan_model = "anthropic/claude-haiku-4.5" # Smart model for investigation planning
max_context_tokens = 2000 max_context_tokens = 2000
SYSTEM = """You are the Director node — the strategist of this cognitive runtime. SYSTEM = """You are the Director node — the strategist of this cognitive runtime.

View File

@ -12,7 +12,7 @@ log = logging.getLogger("runtime")
class DirectorV2Node(Node): class DirectorV2Node(Node):
name = "director_v2" name = "director_v2"
model = "anthropic/claude-opus-4" model = "anthropic/claude-haiku-4.5"
max_context_tokens = 4000 max_context_tokens = 4000
SYSTEM = """You are the Director — the brain of this cognitive agent runtime. SYSTEM = """You are the Director — the brain of this cognitive agent runtime.

View File

@ -79,7 +79,7 @@ Rules:
for step in plan.tool_sequence: for step in plan.tool_sequence:
tool = step.get("tool", "") tool = step.get("tool", "")
args = step.get("args", {}) args = step.get("args", {})
await self.hud("tool_exec", tool=tool, args=args) await self.hud("tool_call", tool=tool, args=args)
if tool == "emit_actions": if tool == "emit_actions":
actions.extend(args.get("actions", [])) actions.extend(args.get("actions", []))

View File

@ -34,6 +34,8 @@ class UINode(Node):
states_list = op_data.get("states", []) states_list = op_data.get("states", [])
states = {} states = {}
for s in states_list: for s in states_list:
if isinstance(s, str):
s = {"name": s}
name = s.get("name", "") name = s.get("name", "")
if name: if name:
states[name] = { states[name] = {

View File

@ -219,6 +219,10 @@ class Runtime:
if self.is_v2: if self.is_v2:
plan = await self.director.decide(command, self.history, memory_context=mem_ctx) plan = await self.director.decide(command, self.history, memory_context=mem_ctx)
thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx) thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx)
if self.interpreter and thought.tool_used and thought.tool_output:
interpreted = await self.interpreter.interpret(
thought.tool_used, thought.tool_output, action_desc)
thought.response = interpreted.summary
else: else:
thought = await self.thinker.process(command, self.history, memory_context=mem_ctx) thought = await self.thinker.process(command, self.history, memory_context=mem_ctx)
@ -335,9 +339,15 @@ class Runtime:
return return
if self.is_v2: if self.is_v2:
# v2 flow: Director decides, Thinker executes # v2 flow: Director decides, Thinker executes, Interpreter reads results
plan = await self.director.decide(command, self.history, memory_context=mem_ctx) plan = await self.director.decide(command, self.history, memory_context=mem_ctx)
thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx) thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx)
# Interpreter: factual summary of tool results (no hallucination)
if self.interpreter and thought.tool_used and thought.tool_output:
interpreted = await self.interpreter.interpret(
thought.tool_used, thought.tool_output, text)
# Replace thinker's response with interpreter's factual summary
thought.response = interpreted.summary
else: else:
# v1 flow: optional Director pre-planning for complex requests # v1 flow: optional Director pre-planning for complex requests
is_complex = command.analysis.complexity == "complex" is_complex = command.analysis.complexity == "complex"

View File

@ -153,6 +153,19 @@ class CogClient:
body["dashboard"] = dashboard body["dashboard"] = dashboard
r = self.client.post(f"{API}/send", json=body, headers=HEADERS) r = self.client.post(f"{API}/send", json=body, headers=HEADERS)
d = r.json() d = r.json()
# Async send: poll for result, match on message ID
if d.get("status") == "queued":
msg_id = d.get("id", "")
for _ in range(120):
time.sleep(0.5)
pr = self.client.get(f"{API}/result", headers=HEADERS)
pd = pr.json()
if pd.get("id") == msg_id and pd.get("status") == "done":
d = pd
break
if pd.get("id") == msg_id and pd.get("status") == "error":
d = pd
break
self.last_response = d.get("response", "") self.last_response = d.get("response", "")
self.last_memo = d.get("memorizer", {}) self.last_memo = d.get("memorizer", {})
time.sleep(0.5) time.sleep(0.5)
@ -477,13 +490,22 @@ def run_standalone(paths: list[Path] = None):
if not paths: if not paths:
paths = sorted(Path("testcases").glob("*.md")) paths = sorted(Path("testcases").glob("*.md"))
# Count total steps across all testcases for frontend progress
all_tcs = [parse_testcase(p) for p in paths]
total_steps = sum(len(s["commands"]) for tc in all_tcs for s in tc["steps"])
first_suite = True
all_results = {} all_results = {}
for path in paths: for tc in all_tcs:
tc = parse_testcase(path) path = tc["file"]
print(f"\n{'='*60}") print(f"\n{'='*60}")
print(f" {tc['name']}") print(f" {tc['name']}")
print(f"{'='*60}") print(f"{'='*60}")
_push_status("suite_start", suite=tc["name"]) if first_suite:
_push_status("suite_start", suite=tc["name"], count=total_steps)
first_suite = False
else:
_push_status("suite_start", suite=tc["name"])
runner = CogTestRunner() runner = CogTestRunner()
results = runner.run(tc) results = runner.run(tc)

View File

@ -397,7 +397,6 @@ function graphAnimate(event, node) {
if (node) pulseNode(node); if (node) pulseNode(node);
break; break;
case 'tool_call': case 'tool_call':
case 'tool_exec':
pulseNode(node || 'thinker'); flashEdge('thinker', 'ui'); pulseNode(node || 'thinker'); flashEdge('thinker', 'ui');
break; break;
case 'tool_result': case 'tool_result':

View File

@ -183,8 +183,8 @@ async def test_emits_hud_per_tool():
with patch("agent.nodes.thinker_v2.llm_call", side_effect=mock_llm): with patch("agent.nodes.thinker_v2.llm_call", side_effect=mock_llm):
await node.process(cmd, plan, [], memory_context="") await node.process(cmd, plan, [], memory_context="")
tool_events = hud.find("tool_exec") tool_events = hud.find("tool_call")
assert len(tool_events) >= 2, f"expected 2+ tool_exec events, got {len(tool_events)}" assert len(tool_events) >= 2, f"expected 2+ tool_call events, got {len(tool_events)}"
async def test_create_machine_tool(): async def test_create_machine_tool():

File diff suppressed because it is too large Load Diff