Split 1161-line monolith into agent/ package: auth, llm, types, process, runtime, api, and nodes/ (base, sensor, input, output, thinker, memorizer). No logic changes — pure structural split. uvicorn agent:app entrypoint unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
64 lines
2.3 KiB
Python
64 lines
2.3 KiB
Python
"""Output Node: streams natural response to the user."""
|
|
|
|
import json
|
|
import logging
|
|
|
|
from fastapi import WebSocket
|
|
|
|
from .base import Node
|
|
from ..llm import llm_call
|
|
from ..types import Command
|
|
|
|
log = logging.getLogger("runtime")
|
|
|
|
|
|
class OutputNode(Node):
|
|
name = "output"
|
|
model = "google/gemini-2.0-flash-001"
|
|
max_context_tokens = 4000
|
|
|
|
SYSTEM = """You are the Output node — the voice of this cognitive runtime.
|
|
The Input node sends you its perception of what the user said. This is internal context for you — never repeat or echo it.
|
|
You respond to the USER, not to the Input node. Use the perception to understand intent, then act on it.
|
|
Be natural. Be concise. If the user asks you to do something, do it — don't describe what you're about to do.
|
|
|
|
{memory_context}"""
|
|
|
|
async def process(self, command: Command, history: list[dict], ws: WebSocket, memory_context: str = "") -> str:
|
|
await self.hud("streaming")
|
|
|
|
messages = [
|
|
{"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
|
|
]
|
|
for msg in history[-20:]:
|
|
messages.append(msg)
|
|
messages.append({"role": "system", "content": f"Input perception: {command.instruction}"})
|
|
messages = self.trim_context(messages)
|
|
|
|
await self.hud("context", messages=messages, tokens=self.last_context_tokens,
|
|
max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct)
|
|
|
|
client, resp = await llm_call(self.model, messages, stream=True)
|
|
full_response = ""
|
|
try:
|
|
async for line in resp.aiter_lines():
|
|
if not line.startswith("data: "):
|
|
continue
|
|
payload = line[6:]
|
|
if payload == "[DONE]":
|
|
break
|
|
chunk = json.loads(payload)
|
|
delta = chunk["choices"][0].get("delta", {})
|
|
token = delta.get("content", "")
|
|
if token:
|
|
full_response += token
|
|
await ws.send_text(json.dumps({"type": "delta", "content": token}))
|
|
finally:
|
|
await resp.aclose()
|
|
await client.aclose()
|
|
|
|
log.info(f"[output] response: {full_response[:100]}...")
|
|
await ws.send_text(json.dumps({"type": "done"}))
|
|
await self.hud("done")
|
|
return full_response
|