v0.14.4: Interpreter wired in v2, tool_call convention, Haiku models, UI fix
- Wire Interpreter into v2 pipeline (after Thinker tool_output, before Output) - Rename tool_exec -> tool_call everywhere (consistent convention across v1/v2) - Switch Director v1+v2 to anthropic/claude-haiku-4.5 (was opus, reserved) - Fix UI apply_machine_ops crash when states are strings instead of dicts - Fix runtime_test.py async poll to match on message ID (prevent stale results) - Add traceback to pipeline error logging Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
da92109550
commit
4c412d3c4b
@ -40,7 +40,7 @@ def _broadcast_sse(event: dict):
|
||||
if _pipeline_result.get("status") == "running":
|
||||
node = event.get("node", "")
|
||||
evt = event.get("event", "")
|
||||
if node and evt in ("thinking", "perceived", "decided", "streaming", "tool_exec", "interpreted", "updated"):
|
||||
if node and evt in ("thinking", "perceived", "decided", "streaming", "tool_call", "interpreted", "updated"):
|
||||
_pipeline_result["stage"] = node
|
||||
_pipeline_result["event"] = evt
|
||||
|
||||
@ -199,7 +199,8 @@ def register_routes(app):
|
||||
"memorizer": runtime.memorizer.state,
|
||||
}
|
||||
except Exception as e:
|
||||
log.error(f"[api] pipeline error: {e}")
|
||||
import traceback
|
||||
log.error(f"[api] pipeline error: {e}\n{traceback.format_exc()}")
|
||||
_pipeline_result = {
|
||||
"status": "error",
|
||||
"id": msg_id,
|
||||
|
||||
@ -12,7 +12,7 @@ log = logging.getLogger("runtime")
|
||||
class DirectorNode(Node):
|
||||
name = "director"
|
||||
model = "google/gemini-2.0-flash-001"
|
||||
plan_model = "anthropic/claude-opus-4" # Smart model for investigation planning
|
||||
plan_model = "anthropic/claude-haiku-4.5" # Smart model for investigation planning
|
||||
max_context_tokens = 2000
|
||||
|
||||
SYSTEM = """You are the Director node — the strategist of this cognitive runtime.
|
||||
|
||||
@ -12,7 +12,7 @@ log = logging.getLogger("runtime")
|
||||
|
||||
class DirectorV2Node(Node):
|
||||
name = "director_v2"
|
||||
model = "anthropic/claude-opus-4"
|
||||
model = "anthropic/claude-haiku-4.5"
|
||||
max_context_tokens = 4000
|
||||
|
||||
SYSTEM = """You are the Director — the brain of this cognitive agent runtime.
|
||||
|
||||
@ -79,7 +79,7 @@ Rules:
|
||||
for step in plan.tool_sequence:
|
||||
tool = step.get("tool", "")
|
||||
args = step.get("args", {})
|
||||
await self.hud("tool_exec", tool=tool, args=args)
|
||||
await self.hud("tool_call", tool=tool, args=args)
|
||||
|
||||
if tool == "emit_actions":
|
||||
actions.extend(args.get("actions", []))
|
||||
|
||||
@ -34,6 +34,8 @@ class UINode(Node):
|
||||
states_list = op_data.get("states", [])
|
||||
states = {}
|
||||
for s in states_list:
|
||||
if isinstance(s, str):
|
||||
s = {"name": s}
|
||||
name = s.get("name", "")
|
||||
if name:
|
||||
states[name] = {
|
||||
|
||||
@ -219,6 +219,10 @@ class Runtime:
|
||||
if self.is_v2:
|
||||
plan = await self.director.decide(command, self.history, memory_context=mem_ctx)
|
||||
thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx)
|
||||
if self.interpreter and thought.tool_used and thought.tool_output:
|
||||
interpreted = await self.interpreter.interpret(
|
||||
thought.tool_used, thought.tool_output, action_desc)
|
||||
thought.response = interpreted.summary
|
||||
else:
|
||||
thought = await self.thinker.process(command, self.history, memory_context=mem_ctx)
|
||||
|
||||
@ -335,9 +339,15 @@ class Runtime:
|
||||
return
|
||||
|
||||
if self.is_v2:
|
||||
# v2 flow: Director decides, Thinker executes
|
||||
# v2 flow: Director decides, Thinker executes, Interpreter reads results
|
||||
plan = await self.director.decide(command, self.history, memory_context=mem_ctx)
|
||||
thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx)
|
||||
# Interpreter: factual summary of tool results (no hallucination)
|
||||
if self.interpreter and thought.tool_used and thought.tool_output:
|
||||
interpreted = await self.interpreter.interpret(
|
||||
thought.tool_used, thought.tool_output, text)
|
||||
# Replace thinker's response with interpreter's factual summary
|
||||
thought.response = interpreted.summary
|
||||
else:
|
||||
# v1 flow: optional Director pre-planning for complex requests
|
||||
is_complex = command.analysis.complexity == "complex"
|
||||
|
||||
@ -153,6 +153,19 @@ class CogClient:
|
||||
body["dashboard"] = dashboard
|
||||
r = self.client.post(f"{API}/send", json=body, headers=HEADERS)
|
||||
d = r.json()
|
||||
# Async send: poll for result, match on message ID
|
||||
if d.get("status") == "queued":
|
||||
msg_id = d.get("id", "")
|
||||
for _ in range(120):
|
||||
time.sleep(0.5)
|
||||
pr = self.client.get(f"{API}/result", headers=HEADERS)
|
||||
pd = pr.json()
|
||||
if pd.get("id") == msg_id and pd.get("status") == "done":
|
||||
d = pd
|
||||
break
|
||||
if pd.get("id") == msg_id and pd.get("status") == "error":
|
||||
d = pd
|
||||
break
|
||||
self.last_response = d.get("response", "")
|
||||
self.last_memo = d.get("memorizer", {})
|
||||
time.sleep(0.5)
|
||||
@ -477,13 +490,22 @@ def run_standalone(paths: list[Path] = None):
|
||||
if not paths:
|
||||
paths = sorted(Path("testcases").glob("*.md"))
|
||||
|
||||
# Count total steps across all testcases for frontend progress
|
||||
all_tcs = [parse_testcase(p) for p in paths]
|
||||
total_steps = sum(len(s["commands"]) for tc in all_tcs for s in tc["steps"])
|
||||
first_suite = True
|
||||
|
||||
all_results = {}
|
||||
for path in paths:
|
||||
tc = parse_testcase(path)
|
||||
for tc in all_tcs:
|
||||
path = tc["file"]
|
||||
print(f"\n{'='*60}")
|
||||
print(f" {tc['name']}")
|
||||
print(f"{'='*60}")
|
||||
_push_status("suite_start", suite=tc["name"])
|
||||
if first_suite:
|
||||
_push_status("suite_start", suite=tc["name"], count=total_steps)
|
||||
first_suite = False
|
||||
else:
|
||||
_push_status("suite_start", suite=tc["name"])
|
||||
|
||||
runner = CogTestRunner()
|
||||
results = runner.run(tc)
|
||||
|
||||
@ -397,7 +397,6 @@ function graphAnimate(event, node) {
|
||||
if (node) pulseNode(node);
|
||||
break;
|
||||
case 'tool_call':
|
||||
case 'tool_exec':
|
||||
pulseNode(node || 'thinker'); flashEdge('thinker', 'ui');
|
||||
break;
|
||||
case 'tool_result':
|
||||
|
||||
@ -183,8 +183,8 @@ async def test_emits_hud_per_tool():
|
||||
|
||||
with patch("agent.nodes.thinker_v2.llm_call", side_effect=mock_llm):
|
||||
await node.process(cmd, plan, [], memory_context="")
|
||||
tool_events = hud.find("tool_exec")
|
||||
assert len(tool_events) >= 2, f"expected 2+ tool_exec events, got {len(tool_events)}"
|
||||
tool_events = hud.find("tool_call")
|
||||
assert len(tool_events) >= 2, f"expected 2+ tool_call events, got {len(tool_events)}"
|
||||
|
||||
|
||||
async def test_create_machine_tool():
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user