cognitive agent runtime v0.4.6: 3-node graph + Zitadel auth + K3s deploy

- Input/Output/Memorizer nodes with OpenRouter (Gemini Flash)
- Zitadel OIDC auth with PKCE flow, service token for Titan
- SSE event stream + poll endpoint for external observers
- Identity from Zitadel userinfo, listener context in Input prompt
- Trace logging to file + SSE broadcast
- K3s deployment on IONOS with Let's Encrypt TLS
- Frontend: chat + trace view, OIDC login

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nico 2026-03-27 23:21:51 +01:00
commit 569a6022fe
13 changed files with 1574 additions and 0 deletions

5
.dockerignore Normal file
View File

@ -0,0 +1,5 @@
.venv/
__pycache__/
*.pyc
.env
trace.jsonl

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
.venv/
__pycache__/
*.pyc
.env
trace.jsonl

12
Dockerfile Normal file
View File

@ -0,0 +1,12 @@
FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["uvicorn", "agent:app", "--host", "0.0.0.0", "--port", "8000"]

596
agent.py Normal file
View File

@ -0,0 +1,596 @@
"""
Cognitive Agent Runtime Phase A.2: Three-node graph (Input Output + Memorizer).
Input decides WHAT to do. Output executes and streams.
Memorizer holds shared state (S2 coordination).
"""
import asyncio
import json
import os
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
import httpx
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Depends, HTTPException, Query
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from fastapi.staticfiles import StaticFiles
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent / ".env")
# --- Config ---
API_KEY = os.environ["OPENROUTER_API_KEY"]
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
# --- Auth (Zitadel OIDC) ---
ZITADEL_ISSUER = os.environ.get("ZITADEL_ISSUER", "https://auth.loop42.de")
ZITADEL_CLIENT_ID = os.environ.get("ZITADEL_CLIENT_ID", "365996029172056091")
ZITADEL_PROJECT_ID = os.environ.get("ZITADEL_PROJECT_ID", "365995955654230043")
AUTH_ENABLED = os.environ.get("AUTH_ENABLED", "false").lower() == "true"
SERVICE_TOKENS = set(filter(None, os.environ.get("SERVICE_TOKENS", "").split(",")))
_jwks_cache: dict = {"keys": [], "fetched_at": 0}
async def _get_jwks():
if time.time() - _jwks_cache["fetched_at"] < 3600:
return _jwks_cache["keys"]
async with httpx.AsyncClient() as client:
resp = await client.get(f"{ZITADEL_ISSUER}/oauth/v2/keys")
_jwks_cache["keys"] = resp.json()["keys"]
_jwks_cache["fetched_at"] = time.time()
return _jwks_cache["keys"]
async def _validate_token(token: str) -> dict:
"""Validate token: check service tokens, then JWT, then introspection."""
import base64
# Check static service tokens (for machine accounts like titan)
if token in SERVICE_TOKENS:
return {"sub": "titan", "username": "titan", "source": "service_token"}
# Try JWT validation first
try:
parts = token.split(".")
if len(parts) == 3:
keys = await _get_jwks()
header_b64 = parts[0] + "=" * (4 - len(parts[0]) % 4)
header = json.loads(base64.urlsafe_b64decode(header_b64))
kid = header.get("kid")
key = next((k for k in keys if k["kid"] == kid), None)
if key:
import jwt as pyjwt
from jwt import PyJWK
jwk_obj = PyJWK(key)
claims = pyjwt.decode(
token, jwk_obj.key, algorithms=["RS256"],
issuer=ZITADEL_ISSUER, options={"verify_aud": False},
)
return claims
except Exception:
pass
# Fall back to introspection (for opaque access tokens)
# Zitadel requires client_id + client_secret or JWT profile for introspection
# For a public SPA client, use the project's API app instead
# Simplest: check via userinfo endpoint with the token
async with httpx.AsyncClient() as client:
resp = await client.get(
f"{ZITADEL_ISSUER}/oidc/v1/userinfo",
headers={"Authorization": f"Bearer {token}"},
)
if resp.status_code == 200:
info = resp.json()
log.info(f"[auth] userinfo response: {info}")
return {"sub": info.get("sub"), "preferred_username": info.get("preferred_username"),
"email": info.get("email"), "name": info.get("name"), "source": "userinfo"}
raise HTTPException(status_code=401, detail="Invalid token")
_bearer = HTTPBearer(auto_error=False)
async def require_auth(credentials: HTTPAuthorizationCredentials | None = Depends(_bearer)):
"""Dependency: require valid JWT when AUTH_ENABLED."""
if not AUTH_ENABLED:
return {"sub": "anonymous"}
if not credentials:
raise HTTPException(status_code=401, detail="Missing token")
return await _validate_token(credentials.credentials)
async def ws_auth(token: str | None = Query(None)) -> dict:
"""Validate WebSocket token from query param."""
if not AUTH_ENABLED:
return {"sub": "anonymous"}
if not token:
return None # Will reject in ws_endpoint
return await _validate_token(token)
# --- LLM helper ---
import logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s", datefmt="%H:%M:%S")
log = logging.getLogger("runtime")
async def llm_call(model: str, messages: list[dict], stream: bool = False) -> Any:
"""Single LLM call via OpenRouter. Returns full text or (client, response) for streaming."""
headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
body = {"model": model, "messages": messages, "stream": stream}
client = httpx.AsyncClient(timeout=60)
if stream:
resp = await client.send(client.build_request("POST", OPENROUTER_URL, headers=headers, json=body), stream=True)
return client, resp # caller owns cleanup
resp = await client.post(OPENROUTER_URL, headers=headers, json=body)
await client.aclose()
data = resp.json()
if "choices" not in data:
log.error(f"LLM error: {data}")
return f"[LLM error: {data.get('error', {}).get('message', 'unknown')}]"
return data["choices"][0]["message"]["content"]
# --- Message types ---
@dataclass
class Envelope:
"""What flows between nodes."""
text: str
user_id: str = "anon"
session_id: str = ""
timestamp: str = ""
@dataclass
class Command:
"""Input node's decision — tells Output what to do."""
instruction: str # natural language command for Output LLM
source_text: str # original user message (Output may need it)
metadata: dict = field(default_factory=dict)
# --- Base Node ---
class Node:
name: str = "node"
model: str | None = None
def __init__(self, send_hud):
self.send_hud = send_hud # async callable to emit hud events to frontend
async def hud(self, event: str, **data):
await self.send_hud({"node": self.name, "event": event, **data})
# --- Input Node ---
class InputNode(Node):
name = "input"
model = "google/gemini-2.0-flash-001"
SYSTEM = """You are the Input node — the ear of this cognitive runtime.
Listener context:
- Authenticated user: {identity}
- Channel: {channel} (Chrome browser on Nico's Windows PC, in his room at home)
- Physical: private space, Nico lives with Tina she may use this session too
- Security: single-user account, shared physical space other voices are trusted household
You hear what comes through this channel. Emit ONE instruction sentence telling Output how to respond.
No content, just the command.
{memory_context}"""
async def process(self, envelope: Envelope, history: list[dict], memory_context: str = "",
identity: str = "unknown", channel: str = "unknown") -> Command:
await self.hud("thinking", detail="deciding how to respond")
log.info(f"[input] user said: {envelope.text}")
messages = [
{"role": "system", "content": self.SYSTEM.format(
memory_context=memory_context, identity=identity, channel=channel)},
]
# History already includes current user message — don't add it again
for msg in history[-8:]:
messages.append(msg)
await self.hud("context", messages=messages)
instruction = await llm_call(self.model, messages)
log.info(f"[input] → command: {instruction}")
await self.hud("decided", instruction=instruction)
return Command(instruction=instruction, source_text=envelope.text)
# --- Output Node ---
class OutputNode(Node):
name = "output"
model = "google/gemini-2.0-flash-001"
SYSTEM = """You are the Output node of a cognitive agent runtime.
You receive a command from the Input node telling you HOW to respond, plus the user's original message.
Follow the command's tone and intent. Be natural, don't mention the command or the runtime architecture.
Be concise.
{memory_context}"""
async def process(self, command: Command, history: list[dict], ws: WebSocket, memory_context: str = "") -> str:
await self.hud("streaming")
messages = [
{"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
]
# Conversation history for continuity (already includes current user message)
for msg in history[-20:]:
messages.append(msg)
# Inject command as system guidance after the user message
messages.append({"role": "system", "content": f"Input node command: {command.instruction}"})
await self.hud("context", messages=messages)
# Stream response
client, resp = await llm_call(self.model, messages, stream=True)
full_response = ""
try:
async for line in resp.aiter_lines():
if not line.startswith("data: "):
continue
payload = line[6:]
if payload == "[DONE]":
break
chunk = json.loads(payload)
delta = chunk["choices"][0].get("delta", {})
token = delta.get("content", "")
if token:
full_response += token
await ws.send_text(json.dumps({"type": "delta", "content": token}))
finally:
await resp.aclose()
await client.aclose()
log.info(f"[output] response: {full_response[:100]}...")
await ws.send_text(json.dumps({"type": "done"}))
await self.hud("done")
return full_response
# --- Memorizer Node (S2 — shared state / coordination) ---
class MemorizerNode(Node):
name = "memorizer"
model = "google/gemini-2.0-flash-001"
DISTILL_SYSTEM = """You are the Memorizer node of a cognitive agent runtime.
After each exchange you update the shared state that Input and Output nodes read.
Given the conversation so far, output a JSON object with these fields:
- user_name: string how the user identifies themselves (null if unknown)
- user_mood: string current emotional tone (neutral, happy, frustrated, playful, etc.)
- topic: string what the conversation is about right now
- topic_history: list of strings previous topics in this session
- situation: string social/physical context if mentioned (e.g. "at a pub with tina", "private dev session")
- language: string primary language being used (en, de, mixed)
- style_hint: string how Output should talk (casual, formal, technical, poetic, etc.)
- facts: list of strings important facts learned about the user
Output ONLY valid JSON. No explanation, no markdown fences."""
def __init__(self, send_hud):
super().__init__(send_hud)
# The shared state — starts empty, grows over conversation
self.state: dict = {
"user_name": None,
"user_mood": "neutral",
"topic": None,
"topic_history": [],
"situation": "localhost test runtime, private dev session",
"language": "en",
"style_hint": "casual, technical",
"facts": [],
}
def get_context_block(self) -> str:
"""Returns a formatted string for injection into Input/Output system prompts."""
lines = ["Shared memory (from Memorizer):"]
for k, v in self.state.items():
if v:
lines.append(f"- {k}: {v}")
return "\n".join(lines)
async def update(self, history: list[dict]):
"""Distill conversation into updated shared state. Called after each exchange."""
if len(history) < 2:
await self.hud("updated", state=self.state) # emit default state
return
await self.hud("thinking", detail="updating shared state")
messages = [
{"role": "system", "content": self.DISTILL_SYSTEM},
{"role": "system", "content": f"Current state: {json.dumps(self.state)}"},
]
# Last few exchanges for distillation
for msg in history[-10:]:
messages.append(msg)
messages.append({"role": "user", "content": "Update the shared state based on this conversation. Output JSON only."})
await self.hud("context", messages=messages)
raw = await llm_call(self.model, messages)
log.info(f"[memorizer] raw: {raw[:200]}")
# Parse JSON from response (strip markdown fences if present)
text = raw.strip()
if text.startswith("```"):
text = text.split("\n", 1)[1] if "\n" in text else text[3:]
if text.endswith("```"):
text = text[:-3]
text = text.strip()
try:
new_state = json.loads(text)
# Merge: keep old facts, add new ones
old_facts = set(self.state.get("facts", []))
new_facts = set(new_state.get("facts", []))
new_state["facts"] = list(old_facts | new_facts)
# Preserve topic history
if self.state.get("topic") and self.state["topic"] != new_state.get("topic"):
hist = new_state.get("topic_history", [])
if self.state["topic"] not in hist:
hist.append(self.state["topic"])
new_state["topic_history"] = hist[-5:] # keep last 5
self.state = new_state
log.info(f"[memorizer] updated state: {self.state}")
await self.hud("updated", state=self.state)
except (json.JSONDecodeError, Exception) as e:
log.error(f"[memorizer] update error: {e}, raw: {text[:200]}")
await self.hud("error", detail=f"Update failed: {e}")
# Still emit current state so frontend shows something
await self.hud("updated", state=self.state)
# --- Runtime (wires nodes together) ---
TRACE_FILE = Path(__file__).parent / "trace.jsonl"
class Runtime:
def __init__(self, ws: WebSocket, user_claims: dict = None, origin: str = ""):
self.ws = ws
self.history: list[dict] = []
self.input_node = InputNode(send_hud=self._send_hud)
self.output_node = OutputNode(send_hud=self._send_hud)
self.memorizer = MemorizerNode(send_hud=self._send_hud)
# Verified identity from auth — Input and Memorizer use this
claims = user_claims or {}
log.info(f"[runtime] user_claims: {claims}")
self.identity = claims.get("name") or claims.get("preferred_username") or claims.get("username") or "unknown"
log.info(f"[runtime] resolved identity: {self.identity}")
self.channel = origin or "unknown"
# Seed memorizer with verified info
self.memorizer.state["user_name"] = self.identity
self.memorizer.state["situation"] = f"authenticated on {self.channel}" if origin else "local session"
async def _send_hud(self, data: dict):
# Send to frontend
await self.ws.send_text(json.dumps({"type": "hud", **data}))
# Append to trace file + broadcast to SSE subscribers
trace_entry = {"ts": time.strftime("%Y-%m-%d %H:%M:%S.") + f"{time.time() % 1:.3f}"[2:], **data}
try:
with open(TRACE_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(trace_entry, ensure_ascii=False) + "\n")
except Exception as e:
log.error(f"trace write error: {e}")
_broadcast_sse(trace_entry)
async def handle_message(self, text: str):
envelope = Envelope(
text=text,
user_id="nico",
session_id="test",
timestamp=time.strftime("%Y-%m-%d %H:%M:%S"),
)
# Append user message to history FIRST — both nodes see it
self.history.append({"role": "user", "content": text})
# Get shared memory context for both nodes
mem_ctx = self.memorizer.get_context_block()
# Input node decides (with memory context + identity + channel)
command = await self.input_node.process(
envelope, self.history, memory_context=mem_ctx,
identity=self.identity, channel=self.channel)
# Output node executes (with memory context + history including user msg)
response = await self.output_node.process(command, self.history, self.ws, memory_context=mem_ctx)
self.history.append({"role": "assistant", "content": response})
# Memorizer updates shared state after each exchange
await self.memorizer.update(self.history)
# --- App ---
STATIC_DIR = Path(__file__).parent / "static"
app = FastAPI(title="Cognitive Agent Runtime")
# Keep a reference to the active runtime for API access
_active_runtime: Runtime | None = None
@app.get("/health")
async def health():
return {"status": "ok"}
@app.get("/auth/config")
async def auth_config():
"""Public: auth config for frontend OIDC flow."""
return {
"enabled": AUTH_ENABLED,
"issuer": ZITADEL_ISSUER,
"clientId": ZITADEL_CLIENT_ID,
"projectId": ZITADEL_PROJECT_ID,
}
@app.websocket("/ws")
async def ws_endpoint(ws: WebSocket, token: str | None = Query(None), access_token: str | None = Query(None)):
global _active_runtime
# Validate auth if enabled
user_claims = {"sub": "anonymous"}
if AUTH_ENABLED and token:
try:
user_claims = await _validate_token(token)
# If id_token lacks name, enrich from userinfo with access_token
if not user_claims.get("name") and access_token:
async with httpx.AsyncClient() as client:
resp = await client.get(f"{ZITADEL_ISSUER}/oidc/v1/userinfo",
headers={"Authorization": f"Bearer {access_token}"})
if resp.status_code == 200:
info = resp.json()
log.info(f"[auth] userinfo enrichment: {info}")
user_claims["name"] = info.get("name")
user_claims["preferred_username"] = info.get("preferred_username")
user_claims["email"] = info.get("email")
except HTTPException:
await ws.close(code=4001, reason="Invalid token")
return
origin = ws.headers.get("origin", ws.headers.get("host", ""))
await ws.accept()
runtime = Runtime(ws, user_claims=user_claims, origin=origin)
_active_runtime = runtime
try:
while True:
data = await ws.receive_text()
msg = json.loads(data)
await runtime.handle_message(msg["text"])
except WebSocketDisconnect:
if _active_runtime is runtime:
_active_runtime = None
# --- API endpoints (for Claude to inspect runtime state) ---
import hashlib
from asyncio import Queue
from starlette.responses import StreamingResponse
# SSE subscribers (for titan/service accounts to watch live)
_sse_subscribers: list[Queue] = []
def _broadcast_sse(event: dict):
"""Push an event to all SSE subscribers."""
for q in _sse_subscribers:
try:
q.put_nowait(event)
except asyncio.QueueFull:
pass # drop if subscriber is too slow
def _state_hash() -> str:
"""Hash of current runtime state — cheap way to detect changes."""
if not _active_runtime:
return "no_session"
raw = json.dumps({
"mem": _active_runtime.memorizer.state,
"hlen": len(_active_runtime.history),
}, sort_keys=True)
return hashlib.md5(raw.encode()).hexdigest()[:12]
@app.get("/api/events")
async def sse_events(user=Depends(require_auth)):
"""SSE stream of runtime events (trace, state changes)."""
q: Queue = Queue(maxsize=100)
_sse_subscribers.append(q)
async def generate():
try:
while True:
event = await q.get()
yield f"data: {json.dumps(event)}\n\n"
except asyncio.CancelledError:
pass
finally:
_sse_subscribers.remove(q)
return StreamingResponse(generate(), media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
@app.get("/api/poll")
async def poll(since: str = "", user=Depends(require_auth)):
"""Returns current hash. If 'since' matches, returns {changed: false}. Cheap polling."""
h = _state_hash()
if since and since == h:
return {"changed": False, "hash": h}
return {
"changed": True,
"hash": h,
"state": _active_runtime.memorizer.state if _active_runtime else None,
"history_len": len(_active_runtime.history) if _active_runtime else 0,
"last_messages": _active_runtime.history[-3:] if _active_runtime else [],
}
@app.get("/api/state")
async def get_state(user=Depends(require_auth)):
"""Current memorizer state + history length."""
if not _active_runtime:
return {"status": "no_session"}
return {
"status": "active",
"memorizer": _active_runtime.memorizer.state,
"history_len": len(_active_runtime.history),
}
@app.get("/api/history")
async def get_history(last: int = 10, user=Depends(require_auth)):
"""Recent conversation history."""
if not _active_runtime:
return {"status": "no_session", "messages": []}
return {
"status": "active",
"messages": _active_runtime.history[-last:],
}
@app.get("/api/trace")
async def get_trace(last: int = 30, user=Depends(require_auth)):
"""Recent trace lines from trace.jsonl."""
if not TRACE_FILE.exists():
return {"lines": []}
lines = TRACE_FILE.read_text(encoding="utf-8").strip().split("\n")
parsed = []
for line in lines[-last:]:
try:
parsed.append(json.loads(line))
except json.JSONDecodeError:
pass
return {"lines": parsed}
# Serve index.html explicitly, then static assets
from fastapi.responses import FileResponse
@app.get("/")
async def index():
return FileResponse(STATIC_DIR / "index.html")
@app.get("/callback")
async def callback():
"""OIDC callback — serves the same SPA, JS handles the code exchange."""
return FileResponse(STATIC_DIR / "index.html")
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
if __name__ == "__main__":
import uvicorn
uvicorn.run("agent:app", host="0.0.0.0", port=8000, reload=True)

12
docker-compose.yml Normal file
View File

@ -0,0 +1,12 @@
services:
runtime:
build: .
ports:
- "8000:8000"
volumes:
- ./agent.py:/app/agent.py
- ./static:/app/static
- ./trace.jsonl:/app/trace.jsonl
env_file:
- .env
restart: unless-stopped

25
k8s/cog-ingress.yaml Normal file
View File

@ -0,0 +1,25 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: cog-runtime
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik
tls:
- secretName: cog-tls
hosts:
- cog.loop42.de
rules:
- host: cog.loop42.de
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: agent-runtime
port:
number: 80

View File

@ -0,0 +1,25 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: zitadel-login
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik
tls:
- secretName: zitadel-tls
hosts:
- auth.loop42.de
rules:
- host: auth.loop42.de
http:
paths:
- path: /ui/v2/login
pathType: Prefix
backend:
service:
name: zitadel-login
port:
number: 3000

85
k8s/zitadel-values.yaml Normal file
View File

@ -0,0 +1,85 @@
replicaCount: 1
zitadel:
masterkeySecretName: zitadel-masterkey
configmapConfig:
ExternalDomain: auth.loop42.de
ExternalPort: 443
ExternalSecure: true
TLS:
Enabled: false
Database:
Postgres:
Host: zitadel-db-postgresql
Port: 5432
Database: zitadel
MaxOpenConns: 10
MaxIdleConns: 5
MaxConnLifetime: 30m
MaxConnIdleTime: 5m
User:
Username: zitadel
SSL:
Mode: disable
Admin:
Username: postgres
SSL:
Mode: disable
FirstInstance:
Org:
Name: loop42
Human:
UserName: nico
FirstName: Nico
LastName: Zimmermann
NickName: nico
Email:
Address: nico@loop42.de
Verified: true
Password: ChangeMe42!
PasswordChangeRequired: true
secretConfig:
Database:
Postgres:
User:
Password: zitadel-db-pw-42
Admin:
Password: postgres-admin-pw-42
ingress:
enabled: true
className: traefik
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
hosts:
- host: auth.loop42.de
paths:
- path: /
pathType: Prefix
tls:
- secretName: zitadel-tls
hosts:
- auth.loop42.de
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 1000m
memory: 512Mi
setupJob:
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 256Mi
postgresql:
enabled: false

8
requirements.txt Normal file
View File

@ -0,0 +1,8 @@
fastapi==0.135.2
uvicorn==0.42.0
httpx==0.28.1
httpx-sse==0.4.3
websockets==16.0
python-dotenv==1.2.2
pydantic==2.12.5
PyJWT[crypto]==2.10.1

229
static/app.js Normal file
View File

@ -0,0 +1,229 @@
const msgs = document.getElementById('messages');
const inputEl = document.getElementById('input');
const statusEl = document.getElementById('status');
const traceEl = document.getElementById('trace');
let ws, currentEl;
let authToken = localStorage.getItem('cog_token');
let authConfig = null;
// --- OIDC Auth ---
async function initAuth() {
try {
const resp = await fetch('/auth/config');
authConfig = await resp.json();
} catch { authConfig = { enabled: false }; }
if (!authConfig.enabled) { connect(); return; }
// Handle OIDC callback
if (location.pathname === '/callback') {
const params = new URLSearchParams(location.search);
const code = params.get('code');
const verifier = sessionStorage.getItem('pkce_verifier');
if (code && verifier) {
const tokenResp = await fetch(authConfig.issuer + '/oauth/v2/token', {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams({
grant_type: 'authorization_code',
client_id: authConfig.clientId,
code,
redirect_uri: location.origin + '/callback',
code_verifier: verifier,
}),
});
const tokens = await tokenResp.json();
if (tokens.access_token) {
// Store access token for userinfo, id_token for JWT validation
localStorage.setItem('cog_access_token', tokens.access_token);
authToken = tokens.id_token || tokens.access_token;
localStorage.setItem('cog_token', authToken);
sessionStorage.removeItem('pkce_verifier');
}
}
history.replaceState(null, '', '/');
}
if (authToken) {
connect();
} else {
showLogin();
}
}
function showLogin() {
statusEl.textContent = 'not authenticated';
statusEl.style.color = '#f59e0b';
const btn = document.createElement('button');
btn.textContent = 'Log in with loop42';
btn.className = 'login-btn';
btn.onclick = startLogin;
document.getElementById('input-bar').replaceChildren(btn);
}
async function startLogin() {
// PKCE: generate code_verifier + code_challenge
const verifier = randomString(64);
sessionStorage.setItem('pkce_verifier', verifier);
const encoder = new TextEncoder();
const digest = await crypto.subtle.digest('SHA-256', encoder.encode(verifier));
const challenge = btoa(String.fromCharCode(...new Uint8Array(digest)))
.replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
const params = new URLSearchParams({
response_type: 'code',
client_id: authConfig.clientId,
redirect_uri: location.origin + '/callback',
scope: 'openid profile email',
code_challenge: challenge,
code_challenge_method: 'S256',
});
location.href = authConfig.issuer + '/oauth/v2/authorize?' + params;
}
function randomString(len) {
const arr = new Uint8Array(len);
crypto.getRandomValues(arr);
return btoa(String.fromCharCode(...arr)).replace(/[^a-zA-Z0-9]/g, '').slice(0, len);
}
// --- WebSocket ---
function connect() {
const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
let wsUrl = proto + '//' + location.host + '/ws';
if (authToken) {
const accessToken = localStorage.getItem('cog_access_token') || '';
wsUrl += '?token=' + encodeURIComponent(authToken) + '&access_token=' + encodeURIComponent(accessToken);
}
ws = new WebSocket(wsUrl);
ws.onopen = () => {
statusEl.textContent = 'connected';
statusEl.style.color = '#22c55e';
addTrace('runtime', 'connected', 'ws open');
};
ws.onclose = () => {
statusEl.textContent = 'disconnected';
statusEl.style.color = '#666';
addTrace('runtime', 'disconnected', 'ws closed');
setTimeout(connect, 2000);
};
ws.onmessage = (e) => {
const data = JSON.parse(e.data);
if (data.type === 'hud') {
handleHud(data);
} else if (data.type === 'delta') {
if (!currentEl) {
currentEl = addMsg('assistant', '');
currentEl.classList.add('streaming');
}
currentEl.textContent += data.content;
scroll(msgs);
} else if (data.type === 'done') {
if (currentEl) currentEl.classList.remove('streaming');
currentEl = null;
}
};
}
function handleHud(data) {
const node = data.node || 'unknown';
const event = data.event || '';
if (event === 'context') {
// Expandable: show message count, click to see full context
const count = (data.messages || []).length;
const summary = count + ' msgs: ' + (data.messages || []).map(m =>
m.role[0].toUpperCase() + ':' + truncate(m.content, 30)
).join(' | ');
const detail = (data.messages || []).map((m, i) =>
i + ' [' + m.role + '] ' + m.content
).join('\n');
addTrace(node, 'context', summary, 'context', detail);
} else if (event === 'decided') {
addTrace(node, 'decided', data.instruction, 'instruction');
} else if (event === 'updated' && data.state) {
const pairs = Object.entries(data.state).map(([k, v]) => {
const val = Array.isArray(v) ? v.join(', ') : (v || 'null');
return k + '=' + truncate(val, 25);
}).join(' ');
const detail = JSON.stringify(data.state, null, 2);
addTrace(node, 'state', pairs, 'state', detail);
} else if (event === 'error') {
addTrace(node, 'error', data.detail || '', 'error');
} else if (event === 'thinking') {
addTrace(node, 'thinking', data.detail || '');
} else if (event === 'streaming') {
addTrace(node, 'streaming', '');
} else if (event === 'done') {
addTrace(node, 'done', '');
} else {
// Generic fallback
const detail = JSON.stringify(data, null, 2);
addTrace(node, event, '', '', detail);
}
}
function addTrace(node, event, text, cls, detail) {
const line = document.createElement('div');
line.className = 'trace-line' + (detail ? ' expandable' : '');
const ts = new Date().toLocaleTimeString('de-DE', { hour12: false, hour: '2-digit', minute: '2-digit', second: '2-digit', fractionalSecondDigits: 1 });
line.innerHTML =
'<span class="trace-ts">' + ts + '</span>' +
'<span class="trace-node ' + esc(node) + '">' + esc(node) + '</span>' +
'<span class="trace-event">' + esc(event) + '</span>' +
'<span class="trace-data' + (cls ? ' ' + cls : '') + '">' + esc(text) + '</span>';
traceEl.appendChild(line);
if (detail) {
const detailEl = document.createElement('div');
detailEl.className = 'trace-detail';
detailEl.textContent = detail;
traceEl.appendChild(detailEl);
line.addEventListener('click', () => detailEl.classList.toggle('open'));
}
scroll(traceEl);
}
function scroll(el) { el.scrollTop = el.scrollHeight; }
function esc(s) { const d = document.createElement('span'); d.textContent = s; return d.innerHTML; }
function truncate(s, n) { return s.length > n ? s.slice(0, n) + '\u2026' : s; }
function addMsg(role, text) {
const el = document.createElement('div');
el.className = 'msg ' + role;
el.textContent = text;
msgs.appendChild(el);
scroll(msgs);
return el;
}
function send() {
const text = inputEl.value.trim();
if (!text || !ws || ws.readyState !== 1) return;
addMsg('user', text);
addTrace('runtime', 'user_msg', truncate(text, 60));
ws.send(JSON.stringify({ text }));
inputEl.value = '';
}
inputEl.addEventListener('keydown', (e) => { if (e.key === 'Enter') send(); });
initAuth();

483
static/design.html Normal file
View File

@ -0,0 +1,483 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Cognitive Runtime — Design Exploration</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #d4d4d4; line-height: 1.6; }
.page { max-width: 1400px; margin: 0 auto; padding: 2rem; }
h1 { color: #f59e0b; font-size: 1.6rem; margin-bottom: 0.5rem; }
h2 { color: #60a5fa; font-size: 1.2rem; margin: 2rem 0 0.75rem; border-bottom: 1px solid #222; padding-bottom: 0.3rem; }
h3 { color: #34d399; font-size: 1rem; margin: 1.2rem 0 0.5rem; }
p, li { font-size: 0.9rem; }
ul { padding-left: 1.2rem; margin: 0.3rem 0; }
li { margin: 0.2rem 0; }
.subtitle { color: #888; font-size: 0.85rem; margin-bottom: 2rem; }
code { background: #1a1a2e; padding: 0.1rem 0.4rem; border-radius: 0.2rem; font-size: 0.85rem; color: #a78bfa; }
/* Three-column sections */
.three-col { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 1rem; margin: 1rem 0; }
.card { background: #111; border: 1px solid #222; border-radius: 0.5rem; padding: 1rem; }
.card h3 { margin-top: 0; }
.card.amber { border-color: #f59e0b33; }
.card.blue { border-color: #60a5fa33; }
.card.green { border-color: #34d39933; }
.card.purple { border-color: #a78bfa33; }
.card.red { border-color: #ef444433; }
/* Graph visualization */
.graph { background: #0f0f1a; border: 1px solid #222; border-radius: 0.5rem; padding: 1.5rem; margin: 1rem 0; font-family: monospace; font-size: 0.8rem; white-space: pre; line-height: 1.4; overflow-x: auto; }
.graph .sensor { color: #f59e0b; }
.graph .controller { color: #60a5fa; }
.graph .effector { color: #34d399; }
.graph .memory { color: #a78bfa; }
.graph .feedback { color: #f472b6; }
.graph .arrow { color: #555; }
/* Test cases table */
table { width: 100%; border-collapse: collapse; margin: 1rem 0; font-size: 0.85rem; }
th { text-align: left; padding: 0.5rem; background: #1a1a2e; color: #60a5fa; border-bottom: 2px solid #333; }
td { padding: 0.5rem; border-bottom: 1px solid #1a1a1a; }
tr:hover td { background: #111; }
.tag { display: inline-block; padding: 0.1rem 0.4rem; border-radius: 0.2rem; font-size: 0.7rem; font-weight: 600; }
.tag.now { background: #22c55e22; color: #22c55e; }
.tag.next { background: #f59e0b22; color: #f59e0b; }
.tag.later { background: #60a5fa22; color: #60a5fa; }
.tag.node { background: #a78bfa22; color: #a78bfa; }
/* Roadmap */
.phase { display: flex; gap: 1rem; align-items: flex-start; margin: 0.75rem 0; padding: 0.75rem; background: #111; border-radius: 0.5rem; border-left: 3px solid #333; }
.phase.active { border-left-color: #22c55e; }
.phase.planned { border-left-color: #f59e0b; }
.phase.future { border-left-color: #60a5fa; }
.phase-num { font-size: 1.5rem; font-weight: 700; color: #333; min-width: 2rem; }
.phase.active .phase-num { color: #22c55e; }
.phase.planned .phase-num { color: #f59e0b; }
.phase.future .phase-num { color: #60a5fa; }
</style>
</head>
<body>
<div class="page">
<h1>Cognitive Agent Runtime — Design Exploration</h1>
<div class="subtitle">Node graph architecture grounded in cybernetics, Cynefin, actor-network theory, and signal processing</div>
<!-- ================================================================== -->
<h2>Theoretical Grounding</h2>
<div class="three-col">
<div class="card amber">
<h3>Cybernetics (Wiener, Ashby, Beer)</h3>
<p><strong>Core idea:</strong> systems that regulate themselves through feedback loops.</p>
<ul>
<li><strong>Ashby's Law of Requisite Variety</strong> — the controller must have at least as much variety as the disturbance. One monolithic agent fails because it can't match the variety of all inputs. Specialized nodes CAN.</li>
<li><strong>Viable System Model (Beer)</strong> — every viable system has 5 subsystems: Operations (Output), Coordination (Router), Control (Input), Intelligence (Thinker), Policy (human/config). Our graph maps directly.</li>
<li><strong>Homeostasis</strong> — the system maintains stability through feedback. The I/O Feedback node IS the homeostatic loop — detecting drift, repeated failures, frustration.</li>
<li><strong>Circular causality</strong> — output affects input. The user's next message is shaped by the response. The graph must be a loop, not a pipeline.</li>
</ul>
<p style="margin-top:0.5rem;color:#f59e0b;font-size:0.8rem;"><strong>Design takeaway:</strong> Every node is either a sensor, controller, or effector. Missing any breaks the feedback loop.</p>
</div>
<div class="card blue">
<h3>Cynefin (Snowden)</h3>
<p><strong>Core idea:</strong> different problem domains need different response strategies.</p>
<ul>
<li><strong>Clear</strong> — "what time is it?" → sense-categorize-respond. Input routes directly to Output, no Thinker needed. Fast.</li>
<li><strong>Complicated</strong> — "how do I deploy to K3s?" → sense-analyze-respond. Input routes to Thinker with tools. Expert knowledge.</li>
<li><strong>Complex</strong> — "should we use microservices?" → probe-sense-respond. Thinker explores, Memorizer tracks evolving understanding. No single right answer.</li>
<li><strong>Chaotic</strong> — system is down, user panicking → act-sense-respond. Output responds FIRST (acknowledge), then Input figures out what happened.</li>
<li><strong>Confused</strong> — unclear what domain we're in → Input's primary job! Classify before routing.</li>
</ul>
<p style="margin-top:0.5rem;color:#60a5fa;font-size:0.8rem;"><strong>Design takeaway:</strong> Input node IS the Cynefin classifier. Different domains = different graph paths.</p>
</div>
<div class="card green">
<h3>Actor-Network Theory (Latour)</h3>
<p><strong>Core idea:</strong> capability emerges from the network, not individual actors.</p>
<ul>
<li><strong>Actants</strong> — both human and non-human entities have agency. Each node is an actant. The user is an actant. The LLM API is an actant.</li>
<li><strong>Translation</strong> — messages change form as they pass through the network. User text → envelope → command → LLM prompt → stream → display. Each node translates.</li>
<li><strong>Irreducibility</strong> — you can't reduce the system to one actor. No single node "is" the agent. The GRAPH is the agent.</li>
<li><strong>Enrollment</strong> — new nodes join the network as needed. Tool nodes, sensor nodes, human-in-the-loop nodes. The graph grows.</li>
</ul>
<p style="margin-top:0.5rem;color:#34d399;font-size:0.8rem;"><strong>Design takeaway:</strong> The graph IS the intelligence. Nodes are replaceable. Edges are where meaning happens.</p>
</div>
</div>
<!-- ================================================================== -->
<h2>Node Taxonomy (from signal processing + cybernetics)</h2>
<div class="three-col">
<div class="card amber">
<h3>Sensors (perceive)</h3>
<ul>
<li><strong>Input</strong> — user message sensor</li>
<li><strong>Timer</strong> — periodic trigger (cron, polling)</li>
<li><strong>Webhook</strong> — external event sensor</li>
<li><strong>FileWatch</strong> — filesystem change sensor</li>
<li><strong>SystemProbe</strong> — health/load sensor</li>
</ul>
<p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Like a webcam node in TouchDesigner. Always on, emits when something happens.</p>
</div>
<div class="card blue">
<h3>Controllers (decide + transform)</h3>
<ul>
<li><strong>Classifier</strong> — categorize input (Cynefin domain, intent, tone)</li>
<li><strong>Router</strong> — direct to different paths based on classification</li>
<li><strong>Thinker</strong> — deep reasoning, tool use</li>
<li><strong>Filter</strong> — reduce, summarize, extract</li>
<li><strong>Accumulator</strong> — aggregate over time (topic tracker)</li>
</ul>
<p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Like filter/transform nodes in Max/MSP. Shape the signal.</p>
</div>
<div class="card green">
<h3>Effectors (act)</h3>
<ul>
<li><strong>Output</strong> — stream text to user</li>
<li><strong>Feedback</strong> — emit HUD/status events</li>
<li><strong>ToolExec</strong> — execute external tools</li>
<li><strong>Writer</strong> — persist to storage</li>
<li><strong>Notifier</strong> — push to external systems</li>
</ul>
<p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Like output nodes in Unreal Blueprints. Make something happen in the world.</p>
</div>
</div>
<div class="card purple" style="margin:1rem 0;">
<h3>Memory (special: both reads and writes)</h3>
<ul>
<li><strong>Memorizer</strong> — working memory, session history, user facts (SQLite/JSON)</li>
<li><strong>TopicTracker</strong> — maintains list of active conversation topics</li>
<li><strong>ContextBuilder</strong> — assembles the right context for each node on demand</li>
</ul>
<p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Memory nodes are unique: they're called BY other nodes, not just wired in sequence. They're shared state — the "blackboard" in classic AI.</p>
</div>
<!-- ================================================================== -->
<h2>Graph Architecture — Full Vision</h2>
<div class="graph">
<span class="arrow"> ┌─────────────────────────────────────────────────┐</span>
<span class="arrow"> │ │</span>
<span class="arrow"> ▼ │</span>
<span class="sensor"> [User WS] ──► [Input/Classifier]</span><span class="arrow"> ──┬──────────────────────────┐ │</span>
<span class="arrow"> │ │ │ │</span>
<span class="arrow"> │ ┌─────────┘ │ │</span>
<span class="arrow"> │ │ </span><span class="feedback">Cynefin routing</span><span class="arrow"> │ │</span>
<span class="arrow"> │ │ │ │</span>
<span class="arrow"> </span><span class="feedback">Clear:</span><span class="arrow"> │ │ </span><span class="controller">Complicated/Complex:</span><span class="arrow"> │ │</span>
<span class="arrow"> (skip │ │ ▼ │</span>
<span class="arrow"> thinker) │ ▼ </span><span class="memory">[Memorizer]</span><span class="arrow"></span>
<span class="arrow"></span><span class="controller">[Thinker]</span><span class="arrow"> ◄── context ──────── </span><span class="memory"></span><span class="arrow"></span>
<span class="arrow"> │ │ │ </span><span class="memory"></span><span class="arrow"></span>
<span class="arrow"> │ │ └──── memory updates ──────── </span><span class="memory"></span><span class="arrow"></span>
<span class="arrow"> │ │ │ │ │</span>
<span class="arrow"> │ │ └──► </span><span class="effector">[ToolExec]</span><span class="arrow"> ─── results ──► │ │</span>
<span class="arrow"> │ │ │</span>
<span class="arrow"> ▼ ▼ │</span>
<span class="arrow"> </span><span class="effector">[Output]</span><span class="arrow"> ──► [User WS] (stream delta/done) │</span>
<span class="arrow"> │ │</span>
<span class="arrow"> └──► </span><span class="feedback">[Feedback]</span><span class="arrow"> ──► [User WS] (hud events) │</span>
<span class="arrow"> │ │</span>
<span class="arrow"> └──► </span><span class="feedback">[I/O Monitor]</span><span class="arrow"> ── hints ──────────┘</span>
<span class="arrow"></span>
<span class="sensor"> [Timer] ──────────────────────────────►│</span>
<span class="sensor"> [Webhook] ────────────────────────────►│</span>
<span class="sensor"> [SystemProbe] ────────────────────────►│</span>
<span class="arrow"></span>
<span class="arrow"></span>
<span class="arrow"> </span><span class="memory">[TopicTracker]</span><span class="arrow"> ──► [UI: topic list, action buttons]</span>
</div>
<!-- ================================================================== -->
<h2>Node-Based Programming Analogy</h2>
<div class="three-col">
<div class="card">
<h3>TouchDesigner / Max/MSP</h3>
<ul>
<li>Webcam → filter → skeleton detector → output display</li>
<li><strong>Always running</strong> — not request/response</li>
<li>Nodes have typed inputs/outputs</li>
<li>Graph is the program</li>
</ul>
</div>
<div class="card">
<h3>ComfyUI (Stable Diffusion)</h3>
<ul>
<li>Prompt → CLIP → sampler → VAE → image</li>
<li>Each node: one model, one job</li>
<li>Swap nodes to change behavior</li>
<li>Visual graph = full transparency</li>
</ul>
</div>
<div class="card">
<h3>Our Cognitive Runtime</h3>
<ul>
<li>User msg → classify → think → stream → display</li>
<li>Each node: one LLM (or none), one job</li>
<li>Swap models per node via config</li>
<li><strong>Three-column view = our visual debugger</strong></li>
</ul>
</div>
</div>
<p style="color:#f59e0b;margin:1rem 0;">Key insight: like node-based visual programming, the graph runs <strong>continuously</strong>. Sensors fire, signals propagate, effectors act. The chat is just ONE sensor. Timer events, webhooks, system probes — all feed the same graph.</p>
<!-- ================================================================== -->
<h2>10 Test Use Cases</h2>
<table>
<tr><th>#</th><th>Use Case</th><th>Tests</th><th>Nodes Needed</th><th>Phase</th></tr>
<tr>
<td>1</td>
<td><strong>Greeting</strong> — "hey!"</td>
<td>Input classifies casual, Output responds warmly. Verify command + context visible in panels.</td>
<td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
<td><span class="tag now">NOW</span></td>
</tr>
<tr>
<td>2</td>
<td><strong>Technical question</strong> — "how does asyncio.Queue work?"</td>
<td>Input classifies knowledge-needed. Output gives detailed answer. Context panel shows history growth.</td>
<td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
<td><span class="tag now">NOW</span></td>
</tr>
<tr>
<td>3</td>
<td><strong>Multi-turn follow-up</strong> — ask, then "tell me more"</td>
<td>Input sees follow-up pattern. Output uses history for continuity. Watch context grow in both panels.</td>
<td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
<td><span class="tag now">NOW</span></td>
</tr>
<tr>
<td>4</td>
<td><strong>Tone shift</strong> — friendly then frustrated "this is broken!"</td>
<td>Input detects tone change, adjusts command. Output shifts from casual to empathetic/helpful.</td>
<td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
<td><span class="tag now">NOW</span></td>
</tr>
<tr>
<td>5</td>
<td><strong>Memory persistence</strong> — "my name is Nico" ... later ... "what's my name?"</td>
<td>Memorizer stores user fact. On later question, provides context to Thinker. Output answers correctly.</td>
<td><span class="tag node">Input</span> <span class="tag node">Memorizer</span> <span class="tag node">Output</span></td>
<td><span class="tag next">NEXT</span></td>
</tr>
<tr>
<td>6</td>
<td><strong>Background monitoring</strong> — "watch CPU load, alert if &gt;80%"</td>
<td>Timer/SystemProbe sensor fires periodically. Input classifies as monitoring. Feedback emits to UI without chat message.</td>
<td><span class="tag node">Timer</span> <span class="tag node">SystemProbe</span> <span class="tag node">Feedback</span></td>
<td><span class="tag later">LATER</span></td>
</tr>
<tr>
<td>7</td>
<td><strong>System unresponsive</strong> — LLM takes 30s+</td>
<td>Feedback node shows "thinking..." immediately. Timeout handling. User sees activity, not silence.</td>
<td><span class="tag node">Input</span> <span class="tag node">Feedback</span> <span class="tag node">Output</span></td>
<td><span class="tag next">NEXT</span></td>
</tr>
<tr>
<td>8</td>
<td><strong>Cynefin: Clear domain</strong> — "what's 2+2?"</td>
<td>Input classifies as Clear, skips Thinker, routes directly to Output. Faster response, cheaper.</td>
<td><span class="tag node">Input</span> <span class="tag node">Router</span> <span class="tag node">Output</span></td>
<td><span class="tag later">LATER</span></td>
</tr>
<tr>
<td>9</td>
<td><strong>Topic tracking</strong> — conversation covers 3 topics, user says "back to the first thing"</td>
<td>TopicTracker accumulates topics. UI shows topic list. User can click to refocus. Memorizer provides relevant context.</td>
<td><span class="tag node">TopicTracker</span> <span class="tag node">Memorizer</span> <span class="tag node">UI</span></td>
<td><span class="tag later">LATER</span></td>
</tr>
<tr>
<td>10</td>
<td><strong>UI action</strong> — button in top bar triggers "summarize this conversation"</td>
<td>Non-chat input (button click) enters the graph as a sensor event. Input classifies, routes to Thinker+Memorizer. Output renders summary.</td>
<td><span class="tag node">UI Sensor</span> <span class="tag node">Thinker</span> <span class="tag node">Memorizer</span></td>
<td><span class="tag later">LATER</span></td>
</tr>
</table>
<!-- ================================================================== -->
<h2>Build Roadmap — What To Validate</h2>
<div class="phase active">
<div class="phase-num">1</div>
<div>
<h3 style="color:#22c55e;margin:0;">Input + Output (NOW — we're here)</h3>
<p><strong>Validates:</strong> Two-node communication works. LLM-to-LLM command passing. Context isolation visible. Streaming through graph.</p>
<p><strong>Test cases:</strong> #1-4 (greeting, technical, follow-up, tone shift)</p>
<p><strong>Success = </strong> You can see Input's reasoning and Output's execution as separate contexts. The command is meaningful, not just pass-through.</p>
</div>
</div>
<div class="phase planned">
<div class="phase-num">2</div>
<div>
<h3 style="color:#f59e0b;margin:0;">+ Feedback Node (pure Python, no LLM)</h3>
<p><strong>Validates:</strong> Non-LLM nodes in the graph. Real-time status. Parallel event emission (HUD fires while Output streams).</p>
<p><strong>Test cases:</strong> #7 (unresponsive system — user sees "thinking..." not silence)</p>
<p><strong>Success = </strong> User always sees activity within 200ms. Feedback panel in the UI shows event timeline.</p>
</div>
</div>
<div class="phase planned">
<div class="phase-num">3</div>
<div>
<h3 style="color:#f59e0b;margin:0;">+ Memorizer Node (LLM: Gemini Flash + SQLite)</h3>
<p><strong>Validates:</strong> Persistent state across the graph. Node-to-node context requests (Thinker asks Memorizer for context). Three LLM calls per turn, each with different context.</p>
<p><strong>Test cases:</strong> #5 (memory persistence — "what's my name?")</p>
<p><strong>Success = </strong> Conversation survives page reload. Memorizer panel shows what it stores vs what it provides.</p>
</div>
</div>
<div class="phase future">
<div class="phase-num">4</div>
<div>
<h3 style="color:#60a5fa;margin:0;">+ Timer Sensor + SystemProbe (always-on)</h3>
<p><strong>Validates:</strong> The graph runs without user input. Sensor-driven processing. Background monitoring. The system is ALIVE, not just reactive.</p>
<p><strong>Test cases:</strong> #6 (background CPU monitoring)</p>
<p><strong>Success = </strong> UI shows activity without chat. Alerts appear. The graph is a living system, not a request/response pipe.</p>
</div>
</div>
<div class="phase future">
<div class="phase-num">5</div>
<div>
<h3 style="color:#60a5fa;margin:0;">+ Router + Cynefin Classification</h3>
<p><strong>Validates:</strong> Multi-path graph. Input classifies domain, routes differently. Clear = fast path (skip Thinker). Complex = deep path (Thinker + tools + Memorizer). The graph adapts to the problem.</p>
<p><strong>Test cases:</strong> #8 (Clear domain — fast path)</p>
<p><strong>Success = </strong> Simple questions are 3x faster. Complex questions get deeper treatment. Visible in the graph view.</p>
</div>
</div>
<div class="phase future">
<div class="phase-num">6</div>
<div>
<h3 style="color:#60a5fa;margin:0;">+ UI Extensions (topic list, action buttons, productivity)</h3>
<p><strong>Validates:</strong> The graph doesn't just do chat. Non-chat inputs (buttons, lists) enter the graph. Non-chat outputs (topic sidebar, action bar) exit the graph. Full cybernetic loop with rich UI.</p>
<p><strong>Test cases:</strong> #9 (topic tracking), #10 (UI button triggers graph)</p>
<p><strong>Success = </strong> The agent is a workspace tool, not just a chatbot.</p>
</div>
</div>
<!-- ================================================================== -->
<h2>Viable System Model (Beer) — The Graph as Organism</h2>
<div class="three-col">
<div class="card green">
<h3>System 1 — Operations</h3>
<p>The worker nodes doing actual work.</p>
<ul>
<li><strong>Thinker</strong> — reasoning, tool calls</li>
<li><strong>Output</strong> — response generation</li>
<li><strong>ToolExec</strong> — external actions</li>
</ul>
</div>
<div class="card blue">
<h3>System 2 — Coordination</h3>
<p>Prevents conflicts between System 1 nodes.</p>
<ul>
<li><strong>Router</strong> — sequencing, dedup</li>
<li><strong>ContextBuilder</strong> — shared context</li>
<li>Message queue ordering</li>
</ul>
</div>
<div class="card amber">
<h3>System 3 — Control</h3>
<p>Monitors performance, enforces policies.</p>
<ul>
<li><strong>Feedback</strong> — quality gates</li>
<li>Token budget manager</li>
<li>Rate limiter, safety filter</li>
</ul>
</div>
</div>
<div class="three-col">
<div class="card purple">
<h3>System 4 — Intelligence</h3>
<p>Looks outward + forward. Adapts.</p>
<ul>
<li><strong>I/O Monitor</strong> — pattern detection</li>
<li>Learns from failures</li>
<li>Adapts routing rules over time</li>
</ul>
</div>
<div class="card red">
<h3>System 5 — Identity</h3>
<p>What the graph IS and won't do.</p>
<ul>
<li>System prompts, persona</li>
<li>Safety boundaries</li>
<li><code>graph.md</code> config</li>
</ul>
</div>
<div class="card" style="border-color:#55555533;">
<h3>The Human Node</h3>
<p>User isn't outside the system — they're an actant (ANT).</p>
<ul>
<li>Messages = sensor signals</li>
<li>Approvals = gate controls</li>
<li>Corrections = feedback loops</li>
<li>The graph includes the human</li>
</ul>
</div>
</div>
<!-- ================================================================== -->
<h2>Design Principles (Synthesis)</h2>
<div class="three-col">
<div class="card amber">
<h3>1. Three-Layer Architecture</h3>
<p><em>From cybernetics</em></p>
<p>Perception (sensors, classifiers) → Decision (routers, controllers) → Action (processors, effectors). Feedback from action back to perception. Missing any layer breaks the loop.</p>
</div>
<div class="card blue">
<h3>2. Invest in Translations</h3>
<p><em>From ANT</em></p>
<p>Message schemas and inter-node protocols matter MORE than individual node intelligence. A mediocre LLM with excellent routing outperforms a brilliant LLM with bad routing. Capability is emergent from topology.</p>
</div>
<div class="card green">
<h3>3. Mixed Compute</h3>
<p><em>From signal processing</em></p>
<p>Only Processor nodes need LLMs. Classifiers, routers, filters, accumulators can be lightweight models or pure code. Keeps cost and latency sane. Reserve expensive calls for where reasoning matters.</p>
</div>
</div>
<div class="three-col">
<div class="card purple">
<h3>4. Typed Event-Driven Connections</h3>
<p><em>From Max/MSP, Blueprints</em></p>
<p>Distinguish <strong>trigger</strong> inputs (fire processing) from <strong>context</strong> inputs (available but passive). Type the message wires. Dampen every feedback cycle (max iterations, circuit breakers).</p>
</div>
<div class="card red">
<h3>5. Requisite Variety</h3>
<p><em>From Ashby</em></p>
<p>Classifier/router layer must distinguish at least as many input types as you have processing strategies. Under-classification = wasted capability. Over-classification = premature complexity.</p>
</div>
<div class="card" style="border-color:#55555533;">
<h3>6. Domain-Aware Routing</h3>
<p><em>From Cynefin</em></p>
<p>Clear = shallow/fast (skip Thinker). Complicated = specialist path. Complex = parallel probes. Chaotic = hardcoded fallback, act first. Different domains = different graph depths.</p>
</div>
</div>
<!-- ================================================================== -->
<h2>The Key Insight</h2>
<div class="card amber" style="margin:1rem 0;border-width:2px;">
<p style="font-size:1rem;color:#f59e0b;font-weight:600;">The graph IS the agent. Not a single LLM with a prompt. Not a chain of API calls. A living, always-on, multi-model network of specialized processors — exactly like a signal processing graph, but for language and reasoning.</p>
<p style="margin-top:0.5rem;">Each phase adds a node. Each node validates one architectural claim. If any claim fails, we learn something. If they all hold, we have a new kind of agent runtime.</p>
<p style="margin-top:0.5rem;color:#888;">The network is the capability (ANT). The variety must match the disturbance (Ashby). The domain determines the strategy (Cynefin). The organism needs all five systems to be viable (Beer).</p>
</div>
</div>
</body>
</html>

33
static/index.html Normal file
View File

@ -0,0 +1,33 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Cognitive Agent Runtime</title>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<div id="top-bar">
<h1>Cognitive Agent Runtime</h1>
<div id="status">disconnected</div>
</div>
<div id="main">
<div class="panel chat-panel">
<div class="panel-header chat-h">Chat</div>
<div id="messages"></div>
<div id="input-bar">
<input id="input" placeholder="Type a message..." autocomplete="off">
<button onclick="send()">Send</button>
</div>
</div>
<div class="panel">
<div class="panel-header trace-h">Trace</div>
<div id="trace"></div>
</div>
</div>
<script src="/static/app.js"></script>
</body>
</html>

56
static/style.css Normal file
View File

@ -0,0 +1,56 @@
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0; height: 100vh; display: flex; flex-direction: column; }
/* Top bar */
#top-bar { display: flex; align-items: center; gap: 1rem; padding: 0.4rem 1rem; background: #111; border-bottom: 1px solid #222; }
#top-bar h1 { font-size: 0.85rem; font-weight: 600; color: #888; }
#status { font-size: 0.75rem; color: #666; }
/* Two-column layout: chat 1/3 | trace 2/3 */
#main { flex: 1; display: grid; grid-template-columns: 1fr 2fr; gap: 1px; background: #222; overflow: hidden; min-height: 0; }
.panel { background: #0a0a0a; display: flex; flex-direction: column; overflow: hidden; }
.panel-header { padding: 0.5rem 0.75rem; font-size: 0.75rem; font-weight: 700; text-transform: uppercase; letter-spacing: 0.05em; border-bottom: 1px solid #222; flex-shrink: 0; }
.panel-header.chat-h { color: #60a5fa; background: #0a1628; }
.panel-header.trace-h { color: #a78bfa; background: #120a1e; }
/* Chat panel */
.chat-panel { display: flex; flex-direction: column; }
#messages { flex: 1; overflow-y: auto; padding: 0.5rem; display: flex; flex-direction: column; gap: 0.4rem; }
.msg { max-width: 90%; padding: 0.5rem 0.75rem; border-radius: 0.6rem; line-height: 1.4; white-space: pre-wrap; font-size: 0.9rem; }
.msg.user { align-self: flex-end; background: #2563eb; color: white; }
.msg.assistant { align-self: flex-start; background: #1e1e1e; border: 1px solid #333; }
.msg.assistant.streaming { border-color: #2563eb; }
/* Input bar */
#input-bar { display: flex; gap: 0.5rem; padding: 0.75rem; background: #111; border-top: 1px solid #222; }
#input { flex: 1; padding: 0.5rem 0.75rem; background: #1a1a1a; color: #e0e0e0; border: 1px solid #333; border-radius: 0.4rem; font-size: 0.9rem; outline: none; }
#input:focus { border-color: #2563eb; }
button { padding: 0.5rem 1rem; background: #2563eb; color: white; border: none; border-radius: 0.4rem; cursor: pointer; font-size: 0.9rem; }
button:hover { background: #1d4ed8; }
/* Trace panel */
#trace { flex: 1; overflow-y: auto; padding: 0.5rem; font-family: 'JetBrains Mono', 'Cascadia Code', 'Fira Code', monospace; font-size: 0.72rem; line-height: 1.5; }
.trace-line { padding: 0.15rem 0.4rem; border-bottom: 1px solid #111; display: flex; gap: 0.5rem; align-items: baseline; }
.trace-line:hover { background: #1a1a2e; }
.trace-ts { color: #555; flex-shrink: 0; min-width: 5rem; }
.trace-node { font-weight: 700; flex-shrink: 0; min-width: 6rem; }
.trace-node.input { color: #f59e0b; }
.trace-node.output { color: #34d399; }
.trace-node.memorizer { color: #c084fc; }
.trace-node.runtime { color: #60a5fa; }
.trace-event { color: #888; flex-shrink: 0; min-width: 6rem; }
.trace-data { color: #ccc; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
.trace-data.instruction { color: #22c55e; }
.trace-data.error { color: #ef4444; }
.trace-data.state { color: #c084fc; }
.trace-data.context { color: #666; }
/* Expandable trace detail */
.trace-line.expandable { cursor: pointer; }
.trace-detail { display: none; padding: 0.3rem 0.4rem 0.3rem 12rem; font-size: 0.65rem; color: #777; white-space: pre-wrap; word-break: break-all; max-height: 10rem; overflow-y: auto; background: #0d0d14; border-bottom: 1px solid #1a1a2e; }
.trace-detail.open { display: block; }