v0.14.4: Interpreter wired in v2, tool_call convention, Haiku models, UI fix
- Wire Interpreter into v2 pipeline (after Thinker tool_output, before Output) - Rename tool_exec -> tool_call everywhere (consistent convention across v1/v2) - Switch Director v1+v2 to anthropic/claude-haiku-4.5 (was opus, reserved) - Fix UI apply_machine_ops crash when states are strings instead of dicts - Fix runtime_test.py async poll to match on message ID (prevent stale results) - Add traceback to pipeline error logging Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
da92109550
commit
4c412d3c4b
@ -40,7 +40,7 @@ def _broadcast_sse(event: dict):
|
|||||||
if _pipeline_result.get("status") == "running":
|
if _pipeline_result.get("status") == "running":
|
||||||
node = event.get("node", "")
|
node = event.get("node", "")
|
||||||
evt = event.get("event", "")
|
evt = event.get("event", "")
|
||||||
if node and evt in ("thinking", "perceived", "decided", "streaming", "tool_exec", "interpreted", "updated"):
|
if node and evt in ("thinking", "perceived", "decided", "streaming", "tool_call", "interpreted", "updated"):
|
||||||
_pipeline_result["stage"] = node
|
_pipeline_result["stage"] = node
|
||||||
_pipeline_result["event"] = evt
|
_pipeline_result["event"] = evt
|
||||||
|
|
||||||
@ -199,7 +199,8 @@ def register_routes(app):
|
|||||||
"memorizer": runtime.memorizer.state,
|
"memorizer": runtime.memorizer.state,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"[api] pipeline error: {e}")
|
import traceback
|
||||||
|
log.error(f"[api] pipeline error: {e}\n{traceback.format_exc()}")
|
||||||
_pipeline_result = {
|
_pipeline_result = {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"id": msg_id,
|
"id": msg_id,
|
||||||
|
|||||||
@ -12,7 +12,7 @@ log = logging.getLogger("runtime")
|
|||||||
class DirectorNode(Node):
|
class DirectorNode(Node):
|
||||||
name = "director"
|
name = "director"
|
||||||
model = "google/gemini-2.0-flash-001"
|
model = "google/gemini-2.0-flash-001"
|
||||||
plan_model = "anthropic/claude-opus-4" # Smart model for investigation planning
|
plan_model = "anthropic/claude-haiku-4.5" # Smart model for investigation planning
|
||||||
max_context_tokens = 2000
|
max_context_tokens = 2000
|
||||||
|
|
||||||
SYSTEM = """You are the Director node — the strategist of this cognitive runtime.
|
SYSTEM = """You are the Director node — the strategist of this cognitive runtime.
|
||||||
|
|||||||
@ -12,7 +12,7 @@ log = logging.getLogger("runtime")
|
|||||||
|
|
||||||
class DirectorV2Node(Node):
|
class DirectorV2Node(Node):
|
||||||
name = "director_v2"
|
name = "director_v2"
|
||||||
model = "anthropic/claude-opus-4"
|
model = "anthropic/claude-haiku-4.5"
|
||||||
max_context_tokens = 4000
|
max_context_tokens = 4000
|
||||||
|
|
||||||
SYSTEM = """You are the Director — the brain of this cognitive agent runtime.
|
SYSTEM = """You are the Director — the brain of this cognitive agent runtime.
|
||||||
|
|||||||
@ -79,7 +79,7 @@ Rules:
|
|||||||
for step in plan.tool_sequence:
|
for step in plan.tool_sequence:
|
||||||
tool = step.get("tool", "")
|
tool = step.get("tool", "")
|
||||||
args = step.get("args", {})
|
args = step.get("args", {})
|
||||||
await self.hud("tool_exec", tool=tool, args=args)
|
await self.hud("tool_call", tool=tool, args=args)
|
||||||
|
|
||||||
if tool == "emit_actions":
|
if tool == "emit_actions":
|
||||||
actions.extend(args.get("actions", []))
|
actions.extend(args.get("actions", []))
|
||||||
|
|||||||
@ -34,6 +34,8 @@ class UINode(Node):
|
|||||||
states_list = op_data.get("states", [])
|
states_list = op_data.get("states", [])
|
||||||
states = {}
|
states = {}
|
||||||
for s in states_list:
|
for s in states_list:
|
||||||
|
if isinstance(s, str):
|
||||||
|
s = {"name": s}
|
||||||
name = s.get("name", "")
|
name = s.get("name", "")
|
||||||
if name:
|
if name:
|
||||||
states[name] = {
|
states[name] = {
|
||||||
|
|||||||
@ -219,6 +219,10 @@ class Runtime:
|
|||||||
if self.is_v2:
|
if self.is_v2:
|
||||||
plan = await self.director.decide(command, self.history, memory_context=mem_ctx)
|
plan = await self.director.decide(command, self.history, memory_context=mem_ctx)
|
||||||
thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx)
|
thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx)
|
||||||
|
if self.interpreter and thought.tool_used and thought.tool_output:
|
||||||
|
interpreted = await self.interpreter.interpret(
|
||||||
|
thought.tool_used, thought.tool_output, action_desc)
|
||||||
|
thought.response = interpreted.summary
|
||||||
else:
|
else:
|
||||||
thought = await self.thinker.process(command, self.history, memory_context=mem_ctx)
|
thought = await self.thinker.process(command, self.history, memory_context=mem_ctx)
|
||||||
|
|
||||||
@ -335,9 +339,15 @@ class Runtime:
|
|||||||
return
|
return
|
||||||
|
|
||||||
if self.is_v2:
|
if self.is_v2:
|
||||||
# v2 flow: Director decides, Thinker executes
|
# v2 flow: Director decides, Thinker executes, Interpreter reads results
|
||||||
plan = await self.director.decide(command, self.history, memory_context=mem_ctx)
|
plan = await self.director.decide(command, self.history, memory_context=mem_ctx)
|
||||||
thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx)
|
thought = await self.thinker.process(command, plan, self.history, memory_context=mem_ctx)
|
||||||
|
# Interpreter: factual summary of tool results (no hallucination)
|
||||||
|
if self.interpreter and thought.tool_used and thought.tool_output:
|
||||||
|
interpreted = await self.interpreter.interpret(
|
||||||
|
thought.tool_used, thought.tool_output, text)
|
||||||
|
# Replace thinker's response with interpreter's factual summary
|
||||||
|
thought.response = interpreted.summary
|
||||||
else:
|
else:
|
||||||
# v1 flow: optional Director pre-planning for complex requests
|
# v1 flow: optional Director pre-planning for complex requests
|
||||||
is_complex = command.analysis.complexity == "complex"
|
is_complex = command.analysis.complexity == "complex"
|
||||||
|
|||||||
@ -153,6 +153,19 @@ class CogClient:
|
|||||||
body["dashboard"] = dashboard
|
body["dashboard"] = dashboard
|
||||||
r = self.client.post(f"{API}/send", json=body, headers=HEADERS)
|
r = self.client.post(f"{API}/send", json=body, headers=HEADERS)
|
||||||
d = r.json()
|
d = r.json()
|
||||||
|
# Async send: poll for result, match on message ID
|
||||||
|
if d.get("status") == "queued":
|
||||||
|
msg_id = d.get("id", "")
|
||||||
|
for _ in range(120):
|
||||||
|
time.sleep(0.5)
|
||||||
|
pr = self.client.get(f"{API}/result", headers=HEADERS)
|
||||||
|
pd = pr.json()
|
||||||
|
if pd.get("id") == msg_id and pd.get("status") == "done":
|
||||||
|
d = pd
|
||||||
|
break
|
||||||
|
if pd.get("id") == msg_id and pd.get("status") == "error":
|
||||||
|
d = pd
|
||||||
|
break
|
||||||
self.last_response = d.get("response", "")
|
self.last_response = d.get("response", "")
|
||||||
self.last_memo = d.get("memorizer", {})
|
self.last_memo = d.get("memorizer", {})
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
@ -477,13 +490,22 @@ def run_standalone(paths: list[Path] = None):
|
|||||||
if not paths:
|
if not paths:
|
||||||
paths = sorted(Path("testcases").glob("*.md"))
|
paths = sorted(Path("testcases").glob("*.md"))
|
||||||
|
|
||||||
|
# Count total steps across all testcases for frontend progress
|
||||||
|
all_tcs = [parse_testcase(p) for p in paths]
|
||||||
|
total_steps = sum(len(s["commands"]) for tc in all_tcs for s in tc["steps"])
|
||||||
|
first_suite = True
|
||||||
|
|
||||||
all_results = {}
|
all_results = {}
|
||||||
for path in paths:
|
for tc in all_tcs:
|
||||||
tc = parse_testcase(path)
|
path = tc["file"]
|
||||||
print(f"\n{'='*60}")
|
print(f"\n{'='*60}")
|
||||||
print(f" {tc['name']}")
|
print(f" {tc['name']}")
|
||||||
print(f"{'='*60}")
|
print(f"{'='*60}")
|
||||||
_push_status("suite_start", suite=tc["name"])
|
if first_suite:
|
||||||
|
_push_status("suite_start", suite=tc["name"], count=total_steps)
|
||||||
|
first_suite = False
|
||||||
|
else:
|
||||||
|
_push_status("suite_start", suite=tc["name"])
|
||||||
|
|
||||||
runner = CogTestRunner()
|
runner = CogTestRunner()
|
||||||
results = runner.run(tc)
|
results = runner.run(tc)
|
||||||
|
|||||||
@ -397,7 +397,6 @@ function graphAnimate(event, node) {
|
|||||||
if (node) pulseNode(node);
|
if (node) pulseNode(node);
|
||||||
break;
|
break;
|
||||||
case 'tool_call':
|
case 'tool_call':
|
||||||
case 'tool_exec':
|
|
||||||
pulseNode(node || 'thinker'); flashEdge('thinker', 'ui');
|
pulseNode(node || 'thinker'); flashEdge('thinker', 'ui');
|
||||||
break;
|
break;
|
||||||
case 'tool_result':
|
case 'tool_result':
|
||||||
|
|||||||
@ -183,8 +183,8 @@ async def test_emits_hud_per_tool():
|
|||||||
|
|
||||||
with patch("agent.nodes.thinker_v2.llm_call", side_effect=mock_llm):
|
with patch("agent.nodes.thinker_v2.llm_call", side_effect=mock_llm):
|
||||||
await node.process(cmd, plan, [], memory_context="")
|
await node.process(cmd, plan, [], memory_context="")
|
||||||
tool_events = hud.find("tool_exec")
|
tool_events = hud.find("tool_call")
|
||||||
assert len(tool_events) >= 2, f"expected 2+ tool_exec events, got {len(tool_events)}"
|
assert len(tool_events) >= 2, f"expected 2+ tool_call events, got {len(tool_events)}"
|
||||||
|
|
||||||
|
|
||||||
async def test_create_machine_tool():
|
async def test_create_machine_tool():
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user