cognitive agent runtime v0.4.6: 3-node graph + Zitadel auth + K3s deploy

- Input/Output/Memorizer nodes with OpenRouter (Gemini Flash) - Zitadel OIDC auth with PKCE flow, service token for Titan - SSE event stream + poll endpoint for external observers - Identity from Zitadel userinfo, listener context in Input prompt - Trace logging to file + SSE broadcast - K3s deployment on IONOS with Let's Encrypt TLS - Frontend: chat + trace view, OIDC login Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 23:21:51 +01:00 · 2026-03-27 23:21:51 +01:00 · 569a6022fe
commit 569a6022fe
13 changed files with 1574 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,5 @@
 .venv/
 __pycache__/
 *.pyc
 .env
 trace.jsonl
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
 .venv/
 __pycache__/
 *.pyc
 .env
 trace.jsonl
--- a/12
+++ b/12
@ -0,0 +1,12 @@
 FROM python:3.12-slim
 WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 EXPOSE 8000
 CMD ["uvicorn", "agent:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/agent.py
+++ b/agent.py
@ -0,0 +1,596 @@
 """
 Cognitive Agent Runtime — Phase A.2: Three-node graph (Input → Output + Memorizer).
 Input decides WHAT to do. Output executes and streams.
 Memorizer holds shared state (S2 — coordination).
 """
 import asyncio
 import json
 import os
 import time
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
 import httpx
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Depends, HTTPException, Query
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from fastapi.staticfiles import StaticFiles
 from dotenv import load_dotenv
 load_dotenv(Path(__file__).parent / ".env")
 # --- Config ---
 API_KEY = os.environ["OPENROUTER_API_KEY"]
 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
 # --- Auth (Zitadel OIDC) ---
 ZITADEL_ISSUER = os.environ.get("ZITADEL_ISSUER", "https://auth.loop42.de")
 ZITADEL_CLIENT_ID = os.environ.get("ZITADEL_CLIENT_ID", "365996029172056091")
 ZITADEL_PROJECT_ID = os.environ.get("ZITADEL_PROJECT_ID", "365995955654230043")
 AUTH_ENABLED = os.environ.get("AUTH_ENABLED", "false").lower() == "true"
 SERVICE_TOKENS = set(filter(None, os.environ.get("SERVICE_TOKENS", "").split(",")))
 _jwks_cache: dict = {"keys": [], "fetched_at": 0}
 async def _get_jwks():
    if time.time() - _jwks_cache["fetched_at"] < 3600:
        return _jwks_cache["keys"]
    async with httpx.AsyncClient() as client:
        resp = await client.get(f"{ZITADEL_ISSUER}/oauth/v2/keys")
        _jwks_cache["keys"] = resp.json()["keys"]
        _jwks_cache["fetched_at"] = time.time()
    return _jwks_cache["keys"]
 async def _validate_token(token: str) -> dict:
    """Validate token: check service tokens, then JWT, then introspection."""
    import base64
    # Check static service tokens (for machine accounts like titan)
    if token in SERVICE_TOKENS:
        return {"sub": "titan", "username": "titan", "source": "service_token"}
    # Try JWT validation first
    try:
        parts = token.split(".")
        if len(parts) == 3:
            keys = await _get_jwks()
            header_b64 = parts[0] + "=" * (4 - len(parts[0]) % 4)
            header = json.loads(base64.urlsafe_b64decode(header_b64))
            kid = header.get("kid")
            key = next((k for k in keys if k["kid"] == kid), None)
            if key:
                import jwt as pyjwt
                from jwt import PyJWK
                jwk_obj = PyJWK(key)
                claims = pyjwt.decode(
                    token, jwk_obj.key, algorithms=["RS256"],
                    issuer=ZITADEL_ISSUER, options={"verify_aud": False},
                )
                return claims
    except Exception:
        pass
    # Fall back to introspection (for opaque access tokens)
    # Zitadel requires client_id + client_secret or JWT profile for introspection
    # For a public SPA client, use the project's API app instead
    # Simplest: check via userinfo endpoint with the token
    async with httpx.AsyncClient() as client:
        resp = await client.get(
            f"{ZITADEL_ISSUER}/oidc/v1/userinfo",
            headers={"Authorization": f"Bearer {token}"},
        )
        if resp.status_code == 200:
            info = resp.json()
            log.info(f"[auth] userinfo response: {info}")
            return {"sub": info.get("sub"), "preferred_username": info.get("preferred_username"),
                    "email": info.get("email"), "name": info.get("name"), "source": "userinfo"}
    raise HTTPException(status_code=401, detail="Invalid token")
 _bearer = HTTPBearer(auto_error=False)
 async def require_auth(credentials: HTTPAuthorizationCredentials | None = Depends(_bearer)):
    """Dependency: require valid JWT when AUTH_ENABLED."""
    if not AUTH_ENABLED:
        return {"sub": "anonymous"}
    if not credentials:
        raise HTTPException(status_code=401, detail="Missing token")
    return await _validate_token(credentials.credentials)
 async def ws_auth(token: str | None = Query(None)) -> dict:
    """Validate WebSocket token from query param."""
    if not AUTH_ENABLED:
        return {"sub": "anonymous"}
    if not token:
        return None  # Will reject in ws_endpoint
    return await _validate_token(token)
 # --- LLM helper ---
 import logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s", datefmt="%H:%M:%S")
 log = logging.getLogger("runtime")
 async def llm_call(model: str, messages: list[dict], stream: bool = False) -> Any:
    """Single LLM call via OpenRouter. Returns full text or (client, response) for streaming."""
    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
    body = {"model": model, "messages": messages, "stream": stream}
    client = httpx.AsyncClient(timeout=60)
    if stream:
        resp = await client.send(client.build_request("POST", OPENROUTER_URL, headers=headers, json=body), stream=True)
        return client, resp  # caller owns cleanup
    resp = await client.post(OPENROUTER_URL, headers=headers, json=body)
    await client.aclose()
    data = resp.json()
    if "choices" not in data:
        log.error(f"LLM error: {data}")
        return f"[LLM error: {data.get('error', {}).get('message', 'unknown')}]"
    return data["choices"][0]["message"]["content"]
 # --- Message types ---
@dataclass
 class Envelope:
    """What flows between nodes."""
    text: str
    user_id: str = "anon"
    session_id: str = ""
    timestamp: str = ""
@dataclass
 class Command:
    """Input node's decision — tells Output what to do."""
    instruction: str      # natural language command for Output LLM
    source_text: str      # original user message (Output may need it)
    metadata: dict = field(default_factory=dict)
 # --- Base Node ---
 class Node:
    name: str = "node"
    model: str | None = None
    def __init__(self, send_hud):
        self.send_hud = send_hud  # async callable to emit hud events to frontend
    async def hud(self, event: str, **data):
        await self.send_hud({"node": self.name, "event": event, **data})
 # --- Input Node ---
 class InputNode(Node):
    name = "input"
    model = "google/gemini-2.0-flash-001"
    SYSTEM = """You are the Input node — the ear of this cognitive runtime.
 Listener context:
 - Authenticated user: {identity}
 - Channel: {channel} (Chrome browser on Nico's Windows PC, in his room at home)
 - Physical: private space, Nico lives with Tina — she may use this session too
 - Security: single-user account, shared physical space — other voices are trusted household
 You hear what comes through this channel. Emit ONE instruction sentence telling Output how to respond.
 No content, just the command.
 {memory_context}"""
    async def process(self, envelope: Envelope, history: list[dict], memory_context: str = "",
                      identity: str = "unknown", channel: str = "unknown") -> Command:
        await self.hud("thinking", detail="deciding how to respond")
        log.info(f"[input] user said: {envelope.text}")
        messages = [
            {"role": "system", "content": self.SYSTEM.format(
                memory_context=memory_context, identity=identity, channel=channel)},
        ]
        # History already includes current user message — don't add it again
        for msg in history[-8:]:
            messages.append(msg)
        await self.hud("context", messages=messages)
        instruction = await llm_call(self.model, messages)
        log.info(f"[input] → command: {instruction}")
        await self.hud("decided", instruction=instruction)
        return Command(instruction=instruction, source_text=envelope.text)
 # --- Output Node ---
 class OutputNode(Node):
    name = "output"
    model = "google/gemini-2.0-flash-001"
    SYSTEM = """You are the Output node of a cognitive agent runtime.
 You receive a command from the Input node telling you HOW to respond, plus the user's original message.
 Follow the command's tone and intent. Be natural, don't mention the command or the runtime architecture.
 Be concise.
 {memory_context}"""
    async def process(self, command: Command, history: list[dict], ws: WebSocket, memory_context: str = "") -> str:
        await self.hud("streaming")
        messages = [
            {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
        ]
        # Conversation history for continuity (already includes current user message)
        for msg in history[-20:]:
            messages.append(msg)
        # Inject command as system guidance after the user message
        messages.append({"role": "system", "content": f"Input node command: {command.instruction}"})
        await self.hud("context", messages=messages)
        # Stream response
        client, resp = await llm_call(self.model, messages, stream=True)
        full_response = ""
        try:
            async for line in resp.aiter_lines():
                if not line.startswith("data: "):
                    continue
                payload = line[6:]
                if payload == "[DONE]":
                    break
                chunk = json.loads(payload)
                delta = chunk["choices"][0].get("delta", {})
                token = delta.get("content", "")
                if token:
                    full_response += token
                    await ws.send_text(json.dumps({"type": "delta", "content": token}))
        finally:
            await resp.aclose()
            await client.aclose()
        log.info(f"[output] response: {full_response[:100]}...")
        await ws.send_text(json.dumps({"type": "done"}))
        await self.hud("done")
        return full_response
 # --- Memorizer Node (S2 — shared state / coordination) ---
 class MemorizerNode(Node):
    name = "memorizer"
    model = "google/gemini-2.0-flash-001"
    DISTILL_SYSTEM = """You are the Memorizer node of a cognitive agent runtime.
 After each exchange you update the shared state that Input and Output nodes read.
 Given the conversation so far, output a JSON object with these fields:
 - user_name: string — how the user identifies themselves (null if unknown)
 - user_mood: string — current emotional tone (neutral, happy, frustrated, playful, etc.)
 - topic: string — what the conversation is about right now
 - topic_history: list of strings — previous topics in this session
 - situation: string — social/physical context if mentioned (e.g. "at a pub with tina", "private dev session")
 - language: string — primary language being used (en, de, mixed)
 - style_hint: string — how Output should talk (casual, formal, technical, poetic, etc.)
 - facts: list of strings — important facts learned about the user
 Output ONLY valid JSON. No explanation, no markdown fences."""
    def __init__(self, send_hud):
        super().__init__(send_hud)
        # The shared state — starts empty, grows over conversation
        self.state: dict = {
            "user_name": None,
            "user_mood": "neutral",
            "topic": None,
            "topic_history": [],
            "situation": "localhost test runtime, private dev session",
            "language": "en",
            "style_hint": "casual, technical",
            "facts": [],
        }
    def get_context_block(self) -> str:
        """Returns a formatted string for injection into Input/Output system prompts."""
        lines = ["Shared memory (from Memorizer):"]
        for k, v in self.state.items():
            if v:
                lines.append(f"- {k}: {v}")
        return "\n".join(lines)
    async def update(self, history: list[dict]):
        """Distill conversation into updated shared state. Called after each exchange."""
        if len(history) < 2:
            await self.hud("updated", state=self.state)  # emit default state
            return
        await self.hud("thinking", detail="updating shared state")
        messages = [
            {"role": "system", "content": self.DISTILL_SYSTEM},
            {"role": "system", "content": f"Current state: {json.dumps(self.state)}"},
        ]
        # Last few exchanges for distillation
        for msg in history[-10:]:
            messages.append(msg)
        messages.append({"role": "user", "content": "Update the shared state based on this conversation. Output JSON only."})
        await self.hud("context", messages=messages)
        raw = await llm_call(self.model, messages)
        log.info(f"[memorizer] raw: {raw[:200]}")
        # Parse JSON from response (strip markdown fences if present)
        text = raw.strip()
        if text.startswith("```"):
            text = text.split("\n", 1)[1] if "\n" in text else text[3:]
            if text.endswith("```"):
                text = text[:-3]
            text = text.strip()
        try:
            new_state = json.loads(text)
            # Merge: keep old facts, add new ones
            old_facts = set(self.state.get("facts", []))
            new_facts = set(new_state.get("facts", []))
            new_state["facts"] = list(old_facts | new_facts)
            # Preserve topic history
            if self.state.get("topic") and self.state["topic"] != new_state.get("topic"):
                hist = new_state.get("topic_history", [])
                if self.state["topic"] not in hist:
                    hist.append(self.state["topic"])
                new_state["topic_history"] = hist[-5:]  # keep last 5
            self.state = new_state
            log.info(f"[memorizer] updated state: {self.state}")
            await self.hud("updated", state=self.state)
        except (json.JSONDecodeError, Exception) as e:
            log.error(f"[memorizer] update error: {e}, raw: {text[:200]}")
            await self.hud("error", detail=f"Update failed: {e}")
            # Still emit current state so frontend shows something
            await self.hud("updated", state=self.state)
 # --- Runtime (wires nodes together) ---
 TRACE_FILE = Path(__file__).parent / "trace.jsonl"
 class Runtime:
    def __init__(self, ws: WebSocket, user_claims: dict = None, origin: str = ""):
        self.ws = ws
        self.history: list[dict] = []
        self.input_node = InputNode(send_hud=self._send_hud)
        self.output_node = OutputNode(send_hud=self._send_hud)
        self.memorizer = MemorizerNode(send_hud=self._send_hud)
        # Verified identity from auth — Input and Memorizer use this
        claims = user_claims or {}
        log.info(f"[runtime] user_claims: {claims}")
        self.identity = claims.get("name") or claims.get("preferred_username") or claims.get("username") or "unknown"
        log.info(f"[runtime] resolved identity: {self.identity}")
        self.channel = origin or "unknown"
        # Seed memorizer with verified info
        self.memorizer.state["user_name"] = self.identity
        self.memorizer.state["situation"] = f"authenticated on {self.channel}" if origin else "local session"
    async def _send_hud(self, data: dict):
        # Send to frontend
        await self.ws.send_text(json.dumps({"type": "hud", **data}))
        # Append to trace file + broadcast to SSE subscribers
        trace_entry = {"ts": time.strftime("%Y-%m-%d %H:%M:%S.") + f"{time.time() % 1:.3f}"[2:], **data}
        try:
            with open(TRACE_FILE, "a", encoding="utf-8") as f:
                f.write(json.dumps(trace_entry, ensure_ascii=False) + "\n")
        except Exception as e:
            log.error(f"trace write error: {e}")
        _broadcast_sse(trace_entry)
    async def handle_message(self, text: str):
        envelope = Envelope(
            text=text,
            user_id="nico",
            session_id="test",
            timestamp=time.strftime("%Y-%m-%d %H:%M:%S"),
        )
        # Append user message to history FIRST — both nodes see it
        self.history.append({"role": "user", "content": text})
        # Get shared memory context for both nodes
        mem_ctx = self.memorizer.get_context_block()
        # Input node decides (with memory context + identity + channel)
        command = await self.input_node.process(
            envelope, self.history, memory_context=mem_ctx,
            identity=self.identity, channel=self.channel)
        # Output node executes (with memory context + history including user msg)
        response = await self.output_node.process(command, self.history, self.ws, memory_context=mem_ctx)
        self.history.append({"role": "assistant", "content": response})
        # Memorizer updates shared state after each exchange
        await self.memorizer.update(self.history)
 # --- App ---
 STATIC_DIR = Path(__file__).parent / "static"
 app = FastAPI(title="Cognitive Agent Runtime")
 # Keep a reference to the active runtime for API access
 _active_runtime: Runtime | None = None
@app.get("/health")
 async def health():
    return {"status": "ok"}
@app.get("/auth/config")
 async def auth_config():
    """Public: auth config for frontend OIDC flow."""
    return {
        "enabled": AUTH_ENABLED,
        "issuer": ZITADEL_ISSUER,
        "clientId": ZITADEL_CLIENT_ID,
        "projectId": ZITADEL_PROJECT_ID,
    }
@app.websocket("/ws")
 async def ws_endpoint(ws: WebSocket, token: str | None = Query(None), access_token: str | None = Query(None)):
    global _active_runtime
    # Validate auth if enabled
    user_claims = {"sub": "anonymous"}
    if AUTH_ENABLED and token:
        try:
            user_claims = await _validate_token(token)
            # If id_token lacks name, enrich from userinfo with access_token
            if not user_claims.get("name") and access_token:
                async with httpx.AsyncClient() as client:
                    resp = await client.get(f"{ZITADEL_ISSUER}/oidc/v1/userinfo",
                                            headers={"Authorization": f"Bearer {access_token}"})
                    if resp.status_code == 200:
                        info = resp.json()
                        log.info(f"[auth] userinfo enrichment: {info}")
                        user_claims["name"] = info.get("name")
                        user_claims["preferred_username"] = info.get("preferred_username")
                        user_claims["email"] = info.get("email")
        except HTTPException:
            await ws.close(code=4001, reason="Invalid token")
            return
    origin = ws.headers.get("origin", ws.headers.get("host", ""))
    await ws.accept()
    runtime = Runtime(ws, user_claims=user_claims, origin=origin)
    _active_runtime = runtime
    try:
        while True:
            data = await ws.receive_text()
            msg = json.loads(data)
            await runtime.handle_message(msg["text"])
    except WebSocketDisconnect:
        if _active_runtime is runtime:
            _active_runtime = None
 # --- API endpoints (for Claude to inspect runtime state) ---
 import hashlib
 from asyncio import Queue
 from starlette.responses import StreamingResponse
 # SSE subscribers (for titan/service accounts to watch live)
 _sse_subscribers: list[Queue] = []
 def _broadcast_sse(event: dict):
    """Push an event to all SSE subscribers."""
    for q in _sse_subscribers:
        try:
            q.put_nowait(event)
        except asyncio.QueueFull:
            pass  # drop if subscriber is too slow
 def _state_hash() -> str:
    """Hash of current runtime state — cheap way to detect changes."""
    if not _active_runtime:
        return "no_session"
    raw = json.dumps({
        "mem": _active_runtime.memorizer.state,
        "hlen": len(_active_runtime.history),
    }, sort_keys=True)
    return hashlib.md5(raw.encode()).hexdigest()[:12]
@app.get("/api/events")
 async def sse_events(user=Depends(require_auth)):
    """SSE stream of runtime events (trace, state changes)."""
    q: Queue = Queue(maxsize=100)
    _sse_subscribers.append(q)
    async def generate():
        try:
            while True:
                event = await q.get()
                yield f"data: {json.dumps(event)}\n\n"
        except asyncio.CancelledError:
            pass
        finally:
            _sse_subscribers.remove(q)
    return StreamingResponse(generate(), media_type="text/event-stream",
                            headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
@app.get("/api/poll")
 async def poll(since: str = "", user=Depends(require_auth)):
    """Returns current hash. If 'since' matches, returns {changed: false}. Cheap polling."""
    h = _state_hash()
    if since and since == h:
        return {"changed": False, "hash": h}
    return {
        "changed": True,
        "hash": h,
        "state": _active_runtime.memorizer.state if _active_runtime else None,
        "history_len": len(_active_runtime.history) if _active_runtime else 0,
        "last_messages": _active_runtime.history[-3:] if _active_runtime else [],
    }
@app.get("/api/state")
 async def get_state(user=Depends(require_auth)):
    """Current memorizer state + history length."""
    if not _active_runtime:
        return {"status": "no_session"}
    return {
        "status": "active",
        "memorizer": _active_runtime.memorizer.state,
        "history_len": len(_active_runtime.history),
    }
@app.get("/api/history")
 async def get_history(last: int = 10, user=Depends(require_auth)):
    """Recent conversation history."""
    if not _active_runtime:
        return {"status": "no_session", "messages": []}
    return {
        "status": "active",
        "messages": _active_runtime.history[-last:],
    }
@app.get("/api/trace")
 async def get_trace(last: int = 30, user=Depends(require_auth)):
    """Recent trace lines from trace.jsonl."""
    if not TRACE_FILE.exists():
        return {"lines": []}
    lines = TRACE_FILE.read_text(encoding="utf-8").strip().split("\n")
    parsed = []
    for line in lines[-last:]:
        try:
            parsed.append(json.loads(line))
        except json.JSONDecodeError:
            pass
    return {"lines": parsed}
 # Serve index.html explicitly, then static assets
 from fastapi.responses import FileResponse
@app.get("/")
 async def index():
    return FileResponse(STATIC_DIR / "index.html")
@app.get("/callback")
 async def callback():
    """OIDC callback — serves the same SPA, JS handles the code exchange."""
    return FileResponse(STATIC_DIR / "index.html")
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run("agent:app", host="0.0.0.0", port=8000, reload=True)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,12 @@
 services:
  runtime:
    build: .
    ports:
      - "8000:8000"
    volumes:
      - ./agent.py:/app/agent.py
      - ./static:/app/static
      - ./trace.jsonl:/app/trace.jsonl
    env_file:
      - .env
    restart: unless-stopped
--- a/k8s/cog-ingress.yaml
+++ b/k8s/cog-ingress.yaml
@ -0,0 +1,25 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: cog-runtime
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-prod
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik
  tls:
    - secretName: cog-tls
      hosts:
        - cog.loop42.de
  rules:
    - host: cog.loop42.de
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: agent-runtime
                port:
                  number: 80
--- a/k8s/zitadel-login-ingress.yaml
+++ b/k8s/zitadel-login-ingress.yaml
@ -0,0 +1,25 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: zitadel-login
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-prod
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik
  tls:
    - secretName: zitadel-tls
      hosts:
        - auth.loop42.de
  rules:
    - host: auth.loop42.de
      http:
        paths:
          - path: /ui/v2/login
            pathType: Prefix
            backend:
              service:
                name: zitadel-login
                port:
                  number: 3000
--- a/k8s/zitadel-values.yaml
+++ b/k8s/zitadel-values.yaml
@ -0,0 +1,85 @@
 replicaCount: 1
 zitadel:
  masterkeySecretName: zitadel-masterkey
  configmapConfig:
    ExternalDomain: auth.loop42.de
    ExternalPort: 443
    ExternalSecure: true
    TLS:
      Enabled: false
    Database:
      Postgres:
        Host: zitadel-db-postgresql
        Port: 5432
        Database: zitadel
        MaxOpenConns: 10
        MaxIdleConns: 5
        MaxConnLifetime: 30m
        MaxConnIdleTime: 5m
        User:
          Username: zitadel
          SSL:
            Mode: disable
        Admin:
          Username: postgres
          SSL:
            Mode: disable
    FirstInstance:
      Org:
        Name: loop42
        Human:
          UserName: nico
          FirstName: Nico
          LastName: Zimmermann
          NickName: nico
          Email:
            Address: nico@loop42.de
            Verified: true
          Password: ChangeMe42!
          PasswordChangeRequired: true
  secretConfig:
    Database:
      Postgres:
        User:
          Password: zitadel-db-pw-42
        Admin:
          Password: postgres-admin-pw-42
 ingress:
  enabled: true
  className: traefik
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-prod
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
  hosts:
    - host: auth.loop42.de
      paths:
        - path: /
          pathType: Prefix
  tls:
    - secretName: zitadel-tls
      hosts:
        - auth.loop42.de
 resources:
  requests:
    cpu: 100m
    memory: 256Mi
  limits:
    cpu: 1000m
    memory: 512Mi
 setupJob:
  resources:
    requests:
      cpu: 100m
      memory: 128Mi
    limits:
      cpu: 500m
      memory: 256Mi
 postgresql:
  enabled: false
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,8 @@
 fastapi==0.135.2
 uvicorn==0.42.0
 httpx==0.28.1
 httpx-sse==0.4.3
 websockets==16.0
 python-dotenv==1.2.2
 pydantic==2.12.5
 PyJWT[crypto]==2.10.1
--- a/static/app.js
+++ b/static/app.js
@ -0,0 +1,229 @@
 const msgs = document.getElementById('messages');
 const inputEl = document.getElementById('input');
 const statusEl = document.getElementById('status');
 const traceEl = document.getElementById('trace');
 let ws, currentEl;
 let authToken = localStorage.getItem('cog_token');
 let authConfig = null;
 // --- OIDC Auth ---
 async function initAuth() {
  try {
    const resp = await fetch('/auth/config');
    authConfig = await resp.json();
  } catch { authConfig = { enabled: false }; }
  if (!authConfig.enabled) { connect(); return; }
  // Handle OIDC callback
  if (location.pathname === '/callback') {
    const params = new URLSearchParams(location.search);
    const code = params.get('code');
    const verifier = sessionStorage.getItem('pkce_verifier');
    if (code && verifier) {
      const tokenResp = await fetch(authConfig.issuer + '/oauth/v2/token', {
        method: 'POST',
        headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
        body: new URLSearchParams({
          grant_type: 'authorization_code',
          client_id: authConfig.clientId,
          code,
          redirect_uri: location.origin + '/callback',
          code_verifier: verifier,
        }),
      });
      const tokens = await tokenResp.json();
      if (tokens.access_token) {
        // Store access token for userinfo, id_token for JWT validation
        localStorage.setItem('cog_access_token', tokens.access_token);
        authToken = tokens.id_token || tokens.access_token;
        localStorage.setItem('cog_token', authToken);
        sessionStorage.removeItem('pkce_verifier');
      }
    }
    history.replaceState(null, '', '/');
  }
  if (authToken) {
    connect();
  } else {
    showLogin();
  }
 }
 function showLogin() {
  statusEl.textContent = 'not authenticated';
  statusEl.style.color = '#f59e0b';
  const btn = document.createElement('button');
  btn.textContent = 'Log in with loop42';
  btn.className = 'login-btn';
  btn.onclick = startLogin;
  document.getElementById('input-bar').replaceChildren(btn);
 }
 async function startLogin() {
  // PKCE: generate code_verifier + code_challenge
  const verifier = randomString(64);
  sessionStorage.setItem('pkce_verifier', verifier);
  const encoder = new TextEncoder();
  const digest = await crypto.subtle.digest('SHA-256', encoder.encode(verifier));
  const challenge = btoa(String.fromCharCode(...new Uint8Array(digest)))
    .replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
  const params = new URLSearchParams({
    response_type: 'code',
    client_id: authConfig.clientId,
    redirect_uri: location.origin + '/callback',
    scope: 'openid profile email',
    code_challenge: challenge,
    code_challenge_method: 'S256',
  });
  location.href = authConfig.issuer + '/oauth/v2/authorize?' + params;
 }
 function randomString(len) {
  const arr = new Uint8Array(len);
  crypto.getRandomValues(arr);
  return btoa(String.fromCharCode(...arr)).replace(/[^a-zA-Z0-9]/g, '').slice(0, len);
 }
 // --- WebSocket ---
 function connect() {
  const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
  let wsUrl = proto + '//' + location.host + '/ws';
  if (authToken) {
    const accessToken = localStorage.getItem('cog_access_token') || '';
    wsUrl += '?token=' + encodeURIComponent(authToken) + '&access_token=' + encodeURIComponent(accessToken);
  }
  ws = new WebSocket(wsUrl);
  ws.onopen = () => {
    statusEl.textContent = 'connected';
    statusEl.style.color = '#22c55e';
    addTrace('runtime', 'connected', 'ws open');
  };
  ws.onclose = () => {
    statusEl.textContent = 'disconnected';
    statusEl.style.color = '#666';
    addTrace('runtime', 'disconnected', 'ws closed');
    setTimeout(connect, 2000);
  };
  ws.onmessage = (e) => {
    const data = JSON.parse(e.data);
    if (data.type === 'hud') {
      handleHud(data);
    } else if (data.type === 'delta') {
      if (!currentEl) {
        currentEl = addMsg('assistant', '');
        currentEl.classList.add('streaming');
      }
      currentEl.textContent += data.content;
      scroll(msgs);
    } else if (data.type === 'done') {
      if (currentEl) currentEl.classList.remove('streaming');
      currentEl = null;
    }
  };
 }
 function handleHud(data) {
  const node = data.node || 'unknown';
  const event = data.event || '';
  if (event === 'context') {
    // Expandable: show message count, click to see full context
    const count = (data.messages || []).length;
    const summary = count + ' msgs: ' + (data.messages || []).map(m =>
      m.role[0].toUpperCase() + ':' + truncate(m.content, 30)
    ).join(' | ');
    const detail = (data.messages || []).map((m, i) =>
      i + ' [' + m.role + '] ' + m.content
    ).join('\n');
    addTrace(node, 'context', summary, 'context', detail);
  } else if (event === 'decided') {
    addTrace(node, 'decided', data.instruction, 'instruction');
  } else if (event === 'updated' && data.state) {
    const pairs = Object.entries(data.state).map(([k, v]) => {
      const val = Array.isArray(v) ? v.join(', ') : (v || 'null');
      return k + '=' + truncate(val, 25);
    }).join('  ');
    const detail = JSON.stringify(data.state, null, 2);
    addTrace(node, 'state', pairs, 'state', detail);
  } else if (event === 'error') {
    addTrace(node, 'error', data.detail || '', 'error');
  } else if (event === 'thinking') {
    addTrace(node, 'thinking', data.detail || '');
  } else if (event === 'streaming') {
    addTrace(node, 'streaming', '');
  } else if (event === 'done') {
    addTrace(node, 'done', '');
  } else {
    // Generic fallback
    const detail = JSON.stringify(data, null, 2);
    addTrace(node, event, '', '', detail);
  }
 }
 function addTrace(node, event, text, cls, detail) {
  const line = document.createElement('div');
  line.className = 'trace-line' + (detail ? ' expandable' : '');
  const ts = new Date().toLocaleTimeString('de-DE', { hour12: false, hour: '2-digit', minute: '2-digit', second: '2-digit', fractionalSecondDigits: 1 });
  line.innerHTML =
    '<span class="trace-ts">' + ts + '</span>' +
    '<span class="trace-node ' + esc(node) + '">' + esc(node) + '</span>' +
    '<span class="trace-event">' + esc(event) + '</span>' +
    '<span class="trace-data' + (cls ? ' ' + cls : '') + '">' + esc(text) + '</span>';
  traceEl.appendChild(line);
  if (detail) {
    const detailEl = document.createElement('div');
    detailEl.className = 'trace-detail';
    detailEl.textContent = detail;
    traceEl.appendChild(detailEl);
    line.addEventListener('click', () => detailEl.classList.toggle('open'));
  }
  scroll(traceEl);
 }
 function scroll(el) { el.scrollTop = el.scrollHeight; }
 function esc(s) { const d = document.createElement('span'); d.textContent = s; return d.innerHTML; }
 function truncate(s, n) { return s.length > n ? s.slice(0, n) + '\u2026' : s; }
 function addMsg(role, text) {
  const el = document.createElement('div');
  el.className = 'msg ' + role;
  el.textContent = text;
  msgs.appendChild(el);
  scroll(msgs);
  return el;
 }
 function send() {
  const text = inputEl.value.trim();
  if (!text || !ws || ws.readyState !== 1) return;
  addMsg('user', text);
  addTrace('runtime', 'user_msg', truncate(text, 60));
  ws.send(JSON.stringify({ text }));
  inputEl.value = '';
 }
 inputEl.addEventListener('keydown', (e) => { if (e.key === 'Enter') send(); });
 initAuth();
--- a/static/design.html
+++ b/static/design.html
@ -0,0 +1,483 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="utf-8">
 <meta name="viewport" content="width=device-width, initial-scale=1">
 <title>Cognitive Runtime — Design Exploration</title>
 <style>
  * { margin: 0; padding: 0; box-sizing: border-box; }
  body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #d4d4d4; line-height: 1.6; }
  .page { max-width: 1400px; margin: 0 auto; padding: 2rem; }
  h1 { color: #f59e0b; font-size: 1.6rem; margin-bottom: 0.5rem; }
  h2 { color: #60a5fa; font-size: 1.2rem; margin: 2rem 0 0.75rem; border-bottom: 1px solid #222; padding-bottom: 0.3rem; }
  h3 { color: #34d399; font-size: 1rem; margin: 1.2rem 0 0.5rem; }
  p, li { font-size: 0.9rem; }
  ul { padding-left: 1.2rem; margin: 0.3rem 0; }
  li { margin: 0.2rem 0; }
  .subtitle { color: #888; font-size: 0.85rem; margin-bottom: 2rem; }
  code { background: #1a1a2e; padding: 0.1rem 0.4rem; border-radius: 0.2rem; font-size: 0.85rem; color: #a78bfa; }
  /* Three-column sections */
  .three-col { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 1rem; margin: 1rem 0; }
  .card { background: #111; border: 1px solid #222; border-radius: 0.5rem; padding: 1rem; }
  .card h3 { margin-top: 0; }
  .card.amber { border-color: #f59e0b33; }
  .card.blue { border-color: #60a5fa33; }
  .card.green { border-color: #34d39933; }
  .card.purple { border-color: #a78bfa33; }
  .card.red { border-color: #ef444433; }
  /* Graph visualization */
  .graph { background: #0f0f1a; border: 1px solid #222; border-radius: 0.5rem; padding: 1.5rem; margin: 1rem 0; font-family: monospace; font-size: 0.8rem; white-space: pre; line-height: 1.4; overflow-x: auto; }
  .graph .sensor { color: #f59e0b; }
  .graph .controller { color: #60a5fa; }
  .graph .effector { color: #34d399; }
  .graph .memory { color: #a78bfa; }
  .graph .feedback { color: #f472b6; }
  .graph .arrow { color: #555; }
  /* Test cases table */
  table { width: 100%; border-collapse: collapse; margin: 1rem 0; font-size: 0.85rem; }
  th { text-align: left; padding: 0.5rem; background: #1a1a2e; color: #60a5fa; border-bottom: 2px solid #333; }
  td { padding: 0.5rem; border-bottom: 1px solid #1a1a1a; }
  tr:hover td { background: #111; }
  .tag { display: inline-block; padding: 0.1rem 0.4rem; border-radius: 0.2rem; font-size: 0.7rem; font-weight: 600; }
  .tag.now { background: #22c55e22; color: #22c55e; }
  .tag.next { background: #f59e0b22; color: #f59e0b; }
  .tag.later { background: #60a5fa22; color: #60a5fa; }
  .tag.node { background: #a78bfa22; color: #a78bfa; }
  /* Roadmap */
  .phase { display: flex; gap: 1rem; align-items: flex-start; margin: 0.75rem 0; padding: 0.75rem; background: #111; border-radius: 0.5rem; border-left: 3px solid #333; }
  .phase.active { border-left-color: #22c55e; }
  .phase.planned { border-left-color: #f59e0b; }
  .phase.future { border-left-color: #60a5fa; }
  .phase-num { font-size: 1.5rem; font-weight: 700; color: #333; min-width: 2rem; }
  .phase.active .phase-num { color: #22c55e; }
  .phase.planned .phase-num { color: #f59e0b; }
  .phase.future .phase-num { color: #60a5fa; }
 </style>
 </head>
 <body>
 <div class="page">
 <h1>Cognitive Agent Runtime — Design Exploration</h1>
 <div class="subtitle">Node graph architecture grounded in cybernetics, Cynefin, actor-network theory, and signal processing</div>
 <!-- ================================================================== -->
 <h2>Theoretical Grounding</h2>
 <div class="three-col">
  <div class="card amber">
    <h3>Cybernetics (Wiener, Ashby, Beer)</h3>
    <p><strong>Core idea:</strong> systems that regulate themselves through feedback loops.</p>
    <ul>
      <li><strong>Ashby's Law of Requisite Variety</strong> — the controller must have at least as much variety as the disturbance. One monolithic agent fails because it can't match the variety of all inputs. Specialized nodes CAN.</li>
      <li><strong>Viable System Model (Beer)</strong> — every viable system has 5 subsystems: Operations (Output), Coordination (Router), Control (Input), Intelligence (Thinker), Policy (human/config). Our graph maps directly.</li>
      <li><strong>Homeostasis</strong> — the system maintains stability through feedback. The I/O Feedback node IS the homeostatic loop — detecting drift, repeated failures, frustration.</li>
      <li><strong>Circular causality</strong> — output affects input. The user's next message is shaped by the response. The graph must be a loop, not a pipeline.</li>
    </ul>
    <p style="margin-top:0.5rem;color:#f59e0b;font-size:0.8rem;"><strong>Design takeaway:</strong> Every node is either a sensor, controller, or effector. Missing any breaks the feedback loop.</p>
  </div>
  <div class="card blue">
    <h3>Cynefin (Snowden)</h3>
    <p><strong>Core idea:</strong> different problem domains need different response strategies.</p>
    <ul>
      <li><strong>Clear</strong> — "what time is it?" → sense-categorize-respond. Input routes directly to Output, no Thinker needed. Fast.</li>
      <li><strong>Complicated</strong> — "how do I deploy to K3s?" → sense-analyze-respond. Input routes to Thinker with tools. Expert knowledge.</li>
      <li><strong>Complex</strong> — "should we use microservices?" → probe-sense-respond. Thinker explores, Memorizer tracks evolving understanding. No single right answer.</li>
      <li><strong>Chaotic</strong> — system is down, user panicking → act-sense-respond. Output responds FIRST (acknowledge), then Input figures out what happened.</li>
      <li><strong>Confused</strong> — unclear what domain we're in → Input's primary job! Classify before routing.</li>
    </ul>
    <p style="margin-top:0.5rem;color:#60a5fa;font-size:0.8rem;"><strong>Design takeaway:</strong> Input node IS the Cynefin classifier. Different domains = different graph paths.</p>
  </div>
  <div class="card green">
    <h3>Actor-Network Theory (Latour)</h3>
    <p><strong>Core idea:</strong> capability emerges from the network, not individual actors.</p>
    <ul>
      <li><strong>Actants</strong> — both human and non-human entities have agency. Each node is an actant. The user is an actant. The LLM API is an actant.</li>
      <li><strong>Translation</strong> — messages change form as they pass through the network. User text → envelope → command → LLM prompt → stream → display. Each node translates.</li>
      <li><strong>Irreducibility</strong> — you can't reduce the system to one actor. No single node "is" the agent. The GRAPH is the agent.</li>
      <li><strong>Enrollment</strong> — new nodes join the network as needed. Tool nodes, sensor nodes, human-in-the-loop nodes. The graph grows.</li>
    </ul>
    <p style="margin-top:0.5rem;color:#34d399;font-size:0.8rem;"><strong>Design takeaway:</strong> The graph IS the intelligence. Nodes are replaceable. Edges are where meaning happens.</p>
  </div>
 </div>
 <!-- ================================================================== -->
 <h2>Node Taxonomy (from signal processing + cybernetics)</h2>
 <div class="three-col">
  <div class="card amber">
    <h3>Sensors (perceive)</h3>
    <ul>
      <li><strong>Input</strong> — user message sensor</li>
      <li><strong>Timer</strong> — periodic trigger (cron, polling)</li>
      <li><strong>Webhook</strong> — external event sensor</li>
      <li><strong>FileWatch</strong> — filesystem change sensor</li>
      <li><strong>SystemProbe</strong> — health/load sensor</li>
    </ul>
    <p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Like a webcam node in TouchDesigner. Always on, emits when something happens.</p>
  </div>
  <div class="card blue">
    <h3>Controllers (decide + transform)</h3>
    <ul>
      <li><strong>Classifier</strong> — categorize input (Cynefin domain, intent, tone)</li>
      <li><strong>Router</strong> — direct to different paths based on classification</li>
      <li><strong>Thinker</strong> — deep reasoning, tool use</li>
      <li><strong>Filter</strong> — reduce, summarize, extract</li>
      <li><strong>Accumulator</strong> — aggregate over time (topic tracker)</li>
    </ul>
    <p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Like filter/transform nodes in Max/MSP. Shape the signal.</p>
  </div>
  <div class="card green">
    <h3>Effectors (act)</h3>
    <ul>
      <li><strong>Output</strong> — stream text to user</li>
      <li><strong>Feedback</strong> — emit HUD/status events</li>
      <li><strong>ToolExec</strong> — execute external tools</li>
      <li><strong>Writer</strong> — persist to storage</li>
      <li><strong>Notifier</strong> — push to external systems</li>
    </ul>
    <p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Like output nodes in Unreal Blueprints. Make something happen in the world.</p>
  </div>
 </div>
 <div class="card purple" style="margin:1rem 0;">
  <h3>Memory (special: both reads and writes)</h3>
  <ul>
    <li><strong>Memorizer</strong> — working memory, session history, user facts (SQLite/JSON)</li>
    <li><strong>TopicTracker</strong> — maintains list of active conversation topics</li>
    <li><strong>ContextBuilder</strong> — assembles the right context for each node on demand</li>
  </ul>
  <p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Memory nodes are unique: they're called BY other nodes, not just wired in sequence. They're shared state — the "blackboard" in classic AI.</p>
 </div>
 <!-- ================================================================== -->
 <h2>Graph Architecture — Full Vision</h2>
 <div class="graph">
 <span class="arrow">                          ┌─────────────────────────────────────────────────┐</span>
 <span class="arrow">                          │                                                 │</span>
 <span class="arrow">                          ▼                                                 │</span>
 <span class="sensor">  [User WS] ──► [Input/Classifier]</span><span class="arrow"> ──┬──────────────────────────┐          │</span>
 <span class="arrow">                     │              │                          │          │</span>
 <span class="arrow">                     │    ┌─────────┘                          │          │</span>
 <span class="arrow">                     │    │  </span><span class="feedback">Cynefin routing</span><span class="arrow">                    │          │</span>
 <span class="arrow">                     │    │                                    │          │</span>
 <span class="arrow">            </span><span class="feedback">Clear:</span><span class="arrow">  │    │  </span><span class="controller">Complicated/Complex:</span><span class="arrow">           │          │</span>
 <span class="arrow">          (skip     │    │                                    ▼          │</span>
 <span class="arrow">           thinker) │    ▼                              </span><span class="memory">[Memorizer]</span><span class="arrow">     │</span>
 <span class="arrow">                    │  </span><span class="controller">[Thinker]</span><span class="arrow"> ◄── context ──────── </span><span class="memory">    │    </span><span class="arrow">     │</span>
 <span class="arrow">                    │    │  │                             </span><span class="memory">    ▲    </span><span class="arrow">     │</span>
 <span class="arrow">                    │    │  └──── memory updates ──────── </span><span class="memory">    │    </span><span class="arrow">     │</span>
 <span class="arrow">                    │    │  │                                  │          │</span>
 <span class="arrow">                    │    │  └──► </span><span class="effector">[ToolExec]</span><span class="arrow"> ─── results ──► │          │</span>
 <span class="arrow">                    │    │                                               │</span>
 <span class="arrow">                    ▼    ▼                                               │</span>
 <span class="arrow">               </span><span class="effector">[Output]</span><span class="arrow"> ──► [User WS] (stream delta/done)               │</span>
 <span class="arrow">                    │                                                    │</span>
 <span class="arrow">                    └──► </span><span class="feedback">[Feedback]</span><span class="arrow"> ──► [User WS] (hud events)           │</span>
 <span class="arrow">                              │                                         │</span>
 <span class="arrow">                              └──► </span><span class="feedback">[I/O Monitor]</span><span class="arrow"> ── hints ──────────┘</span>
 <span class="arrow">                                        │</span>
 <span class="sensor">  [Timer] ──────────────────────────────►│</span>
 <span class="sensor">  [Webhook] ────────────────────────────►│</span>
 <span class="sensor">  [SystemProbe] ────────────────────────►│</span>
 <span class="arrow">                                        │</span>
 <span class="arrow">                                        ▼</span>
 <span class="arrow">                                  </span><span class="memory">[TopicTracker]</span><span class="arrow"> ──► [UI: topic list, action buttons]</span>
 </div>
 <!-- ================================================================== -->
 <h2>Node-Based Programming Analogy</h2>
 <div class="three-col">
  <div class="card">
    <h3>TouchDesigner / Max/MSP</h3>
    <ul>
      <li>Webcam → filter → skeleton detector → output display</li>
      <li><strong>Always running</strong> — not request/response</li>
      <li>Nodes have typed inputs/outputs</li>
      <li>Graph is the program</li>
    </ul>
  </div>
  <div class="card">
    <h3>ComfyUI (Stable Diffusion)</h3>
    <ul>
      <li>Prompt → CLIP → sampler → VAE → image</li>
      <li>Each node: one model, one job</li>
      <li>Swap nodes to change behavior</li>
      <li>Visual graph = full transparency</li>
    </ul>
  </div>
  <div class="card">
    <h3>Our Cognitive Runtime</h3>
    <ul>
      <li>User msg → classify → think → stream → display</li>
      <li>Each node: one LLM (or none), one job</li>
      <li>Swap models per node via config</li>
      <li><strong>Three-column view = our visual debugger</strong></li>
    </ul>
  </div>
 </div>
 <p style="color:#f59e0b;margin:1rem 0;">Key insight: like node-based visual programming, the graph runs <strong>continuously</strong>. Sensors fire, signals propagate, effectors act. The chat is just ONE sensor. Timer events, webhooks, system probes — all feed the same graph.</p>
 <!-- ================================================================== -->
 <h2>10 Test Use Cases</h2>
 <table>
  <tr><th>#</th><th>Use Case</th><th>Tests</th><th>Nodes Needed</th><th>Phase</th></tr>
  <tr>
    <td>1</td>
    <td><strong>Greeting</strong> — "hey!"</td>
    <td>Input classifies casual, Output responds warmly. Verify command + context visible in panels.</td>
    <td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
    <td><span class="tag now">NOW</span></td>
  </tr>
  <tr>
    <td>2</td>
    <td><strong>Technical question</strong> — "how does asyncio.Queue work?"</td>
    <td>Input classifies knowledge-needed. Output gives detailed answer. Context panel shows history growth.</td>
    <td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
    <td><span class="tag now">NOW</span></td>
  </tr>
  <tr>
    <td>3</td>
    <td><strong>Multi-turn follow-up</strong> — ask, then "tell me more"</td>
    <td>Input sees follow-up pattern. Output uses history for continuity. Watch context grow in both panels.</td>
    <td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
    <td><span class="tag now">NOW</span></td>
  </tr>
  <tr>
    <td>4</td>
    <td><strong>Tone shift</strong> — friendly then frustrated "this is broken!"</td>
    <td>Input detects tone change, adjusts command. Output shifts from casual to empathetic/helpful.</td>
    <td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
    <td><span class="tag now">NOW</span></td>
  </tr>
  <tr>
    <td>5</td>
    <td><strong>Memory persistence</strong> — "my name is Nico" ... later ... "what's my name?"</td>
    <td>Memorizer stores user fact. On later question, provides context to Thinker. Output answers correctly.</td>
    <td><span class="tag node">Input</span> <span class="tag node">Memorizer</span> <span class="tag node">Output</span></td>
    <td><span class="tag next">NEXT</span></td>
  </tr>
  <tr>
    <td>6</td>
    <td><strong>Background monitoring</strong> — "watch CPU load, alert if &gt;80%"</td>
    <td>Timer/SystemProbe sensor fires periodically. Input classifies as monitoring. Feedback emits to UI without chat message.</td>
    <td><span class="tag node">Timer</span> <span class="tag node">SystemProbe</span> <span class="tag node">Feedback</span></td>
    <td><span class="tag later">LATER</span></td>
  </tr>
  <tr>
    <td>7</td>
    <td><strong>System unresponsive</strong> — LLM takes 30s+</td>
    <td>Feedback node shows "thinking..." immediately. Timeout handling. User sees activity, not silence.</td>
    <td><span class="tag node">Input</span> <span class="tag node">Feedback</span> <span class="tag node">Output</span></td>
    <td><span class="tag next">NEXT</span></td>
  </tr>
  <tr>
    <td>8</td>
    <td><strong>Cynefin: Clear domain</strong> — "what's 2+2?"</td>
    <td>Input classifies as Clear, skips Thinker, routes directly to Output. Faster response, cheaper.</td>
    <td><span class="tag node">Input</span> <span class="tag node">Router</span> <span class="tag node">Output</span></td>
    <td><span class="tag later">LATER</span></td>
  </tr>
  <tr>
    <td>9</td>
    <td><strong>Topic tracking</strong> — conversation covers 3 topics, user says "back to the first thing"</td>
    <td>TopicTracker accumulates topics. UI shows topic list. User can click to refocus. Memorizer provides relevant context.</td>
    <td><span class="tag node">TopicTracker</span> <span class="tag node">Memorizer</span> <span class="tag node">UI</span></td>
    <td><span class="tag later">LATER</span></td>
  </tr>
  <tr>
    <td>10</td>
    <td><strong>UI action</strong> — button in top bar triggers "summarize this conversation"</td>
    <td>Non-chat input (button click) enters the graph as a sensor event. Input classifies, routes to Thinker+Memorizer. Output renders summary.</td>
    <td><span class="tag node">UI Sensor</span> <span class="tag node">Thinker</span> <span class="tag node">Memorizer</span></td>
    <td><span class="tag later">LATER</span></td>
  </tr>
 </table>
 <!-- ================================================================== -->
 <h2>Build Roadmap — What To Validate</h2>
 <div class="phase active">
  <div class="phase-num">1</div>
  <div>
    <h3 style="color:#22c55e;margin:0;">Input + Output (NOW — we're here)</h3>
    <p><strong>Validates:</strong> Two-node communication works. LLM-to-LLM command passing. Context isolation visible. Streaming through graph.</p>
    <p><strong>Test cases:</strong> #1-4 (greeting, technical, follow-up, tone shift)</p>
    <p><strong>Success = </strong> You can see Input's reasoning and Output's execution as separate contexts. The command is meaningful, not just pass-through.</p>
  </div>
 </div>
 <div class="phase planned">
  <div class="phase-num">2</div>
  <div>
    <h3 style="color:#f59e0b;margin:0;">+ Feedback Node (pure Python, no LLM)</h3>
    <p><strong>Validates:</strong> Non-LLM nodes in the graph. Real-time status. Parallel event emission (HUD fires while Output streams).</p>
    <p><strong>Test cases:</strong> #7 (unresponsive system — user sees "thinking..." not silence)</p>
    <p><strong>Success = </strong> User always sees activity within 200ms. Feedback panel in the UI shows event timeline.</p>
  </div>
 </div>
 <div class="phase planned">
  <div class="phase-num">3</div>
  <div>
    <h3 style="color:#f59e0b;margin:0;">+ Memorizer Node (LLM: Gemini Flash + SQLite)</h3>
    <p><strong>Validates:</strong> Persistent state across the graph. Node-to-node context requests (Thinker asks Memorizer for context). Three LLM calls per turn, each with different context.</p>
    <p><strong>Test cases:</strong> #5 (memory persistence — "what's my name?")</p>
    <p><strong>Success = </strong> Conversation survives page reload. Memorizer panel shows what it stores vs what it provides.</p>
  </div>
 </div>
 <div class="phase future">
  <div class="phase-num">4</div>
  <div>
    <h3 style="color:#60a5fa;margin:0;">+ Timer Sensor + SystemProbe (always-on)</h3>
    <p><strong>Validates:</strong> The graph runs without user input. Sensor-driven processing. Background monitoring. The system is ALIVE, not just reactive.</p>
    <p><strong>Test cases:</strong> #6 (background CPU monitoring)</p>
    <p><strong>Success = </strong> UI shows activity without chat. Alerts appear. The graph is a living system, not a request/response pipe.</p>
  </div>
 </div>
 <div class="phase future">
  <div class="phase-num">5</div>
  <div>
    <h3 style="color:#60a5fa;margin:0;">+ Router + Cynefin Classification</h3>
    <p><strong>Validates:</strong> Multi-path graph. Input classifies domain, routes differently. Clear = fast path (skip Thinker). Complex = deep path (Thinker + tools + Memorizer). The graph adapts to the problem.</p>
    <p><strong>Test cases:</strong> #8 (Clear domain — fast path)</p>
    <p><strong>Success = </strong> Simple questions are 3x faster. Complex questions get deeper treatment. Visible in the graph view.</p>
  </div>
 </div>
 <div class="phase future">
  <div class="phase-num">6</div>
  <div>
    <h3 style="color:#60a5fa;margin:0;">+ UI Extensions (topic list, action buttons, productivity)</h3>
    <p><strong>Validates:</strong> The graph doesn't just do chat. Non-chat inputs (buttons, lists) enter the graph. Non-chat outputs (topic sidebar, action bar) exit the graph. Full cybernetic loop with rich UI.</p>
    <p><strong>Test cases:</strong> #9 (topic tracking), #10 (UI button triggers graph)</p>
    <p><strong>Success = </strong> The agent is a workspace tool, not just a chatbot.</p>
  </div>
 </div>
 <!-- ================================================================== -->
 <h2>Viable System Model (Beer) — The Graph as Organism</h2>
 <div class="three-col">
  <div class="card green">
    <h3>System 1 — Operations</h3>
    <p>The worker nodes doing actual work.</p>
    <ul>
      <li><strong>Thinker</strong> — reasoning, tool calls</li>
      <li><strong>Output</strong> — response generation</li>
      <li><strong>ToolExec</strong> — external actions</li>
    </ul>
  </div>
  <div class="card blue">
    <h3>System 2 — Coordination</h3>
    <p>Prevents conflicts between System 1 nodes.</p>
    <ul>
      <li><strong>Router</strong> — sequencing, dedup</li>
      <li><strong>ContextBuilder</strong> — shared context</li>
      <li>Message queue ordering</li>
    </ul>
  </div>
  <div class="card amber">
    <h3>System 3 — Control</h3>
    <p>Monitors performance, enforces policies.</p>
    <ul>
      <li><strong>Feedback</strong> — quality gates</li>
      <li>Token budget manager</li>
      <li>Rate limiter, safety filter</li>
    </ul>
  </div>
 </div>
 <div class="three-col">
  <div class="card purple">
    <h3>System 4 — Intelligence</h3>
    <p>Looks outward + forward. Adapts.</p>
    <ul>
      <li><strong>I/O Monitor</strong> — pattern detection</li>
      <li>Learns from failures</li>
      <li>Adapts routing rules over time</li>
    </ul>
  </div>
  <div class="card red">
    <h3>System 5 — Identity</h3>
    <p>What the graph IS and won't do.</p>
    <ul>
      <li>System prompts, persona</li>
      <li>Safety boundaries</li>
      <li><code>graph.md</code> config</li>
    </ul>
  </div>
  <div class="card" style="border-color:#55555533;">
    <h3>The Human Node</h3>
    <p>User isn't outside the system — they're an actant (ANT).</p>
    <ul>
      <li>Messages = sensor signals</li>
      <li>Approvals = gate controls</li>
      <li>Corrections = feedback loops</li>
      <li>The graph includes the human</li>
    </ul>
  </div>
 </div>
 <!-- ================================================================== -->
 <h2>Design Principles (Synthesis)</h2>
 <div class="three-col">
  <div class="card amber">
    <h3>1. Three-Layer Architecture</h3>
    <p><em>From cybernetics</em></p>
    <p>Perception (sensors, classifiers) → Decision (routers, controllers) → Action (processors, effectors). Feedback from action back to perception. Missing any layer breaks the loop.</p>
  </div>
  <div class="card blue">
    <h3>2. Invest in Translations</h3>
    <p><em>From ANT</em></p>
    <p>Message schemas and inter-node protocols matter MORE than individual node intelligence. A mediocre LLM with excellent routing outperforms a brilliant LLM with bad routing. Capability is emergent from topology.</p>
  </div>
  <div class="card green">
    <h3>3. Mixed Compute</h3>
    <p><em>From signal processing</em></p>
    <p>Only Processor nodes need LLMs. Classifiers, routers, filters, accumulators can be lightweight models or pure code. Keeps cost and latency sane. Reserve expensive calls for where reasoning matters.</p>
  </div>
 </div>
 <div class="three-col">
  <div class="card purple">
    <h3>4. Typed Event-Driven Connections</h3>
    <p><em>From Max/MSP, Blueprints</em></p>
    <p>Distinguish <strong>trigger</strong> inputs (fire processing) from <strong>context</strong> inputs (available but passive). Type the message wires. Dampen every feedback cycle (max iterations, circuit breakers).</p>
  </div>
  <div class="card red">
    <h3>5. Requisite Variety</h3>
    <p><em>From Ashby</em></p>
    <p>Classifier/router layer must distinguish at least as many input types as you have processing strategies. Under-classification = wasted capability. Over-classification = premature complexity.</p>
  </div>
  <div class="card" style="border-color:#55555533;">
    <h3>6. Domain-Aware Routing</h3>
    <p><em>From Cynefin</em></p>
    <p>Clear = shallow/fast (skip Thinker). Complicated = specialist path. Complex = parallel probes. Chaotic = hardcoded fallback, act first. Different domains = different graph depths.</p>
  </div>
 </div>
 <!-- ================================================================== -->
 <h2>The Key Insight</h2>
 <div class="card amber" style="margin:1rem 0;border-width:2px;">
  <p style="font-size:1rem;color:#f59e0b;font-weight:600;">The graph IS the agent. Not a single LLM with a prompt. Not a chain of API calls. A living, always-on, multi-model network of specialized processors — exactly like a signal processing graph, but for language and reasoning.</p>
  <p style="margin-top:0.5rem;">Each phase adds a node. Each node validates one architectural claim. If any claim fails, we learn something. If they all hold, we have a new kind of agent runtime.</p>
  <p style="margin-top:0.5rem;color:#888;">The network is the capability (ANT). The variety must match the disturbance (Ashby). The domain determines the strategy (Cynefin). The organism needs all five systems to be viable (Beer).</p>
 </div>
 </div>
 </body>
 </html>
--- a/static/index.html
+++ b/static/index.html
@ -0,0 +1,33 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="utf-8">
 <meta name="viewport" content="width=device-width, initial-scale=1">
 <title>Cognitive Agent Runtime</title>
 <link rel="stylesheet" href="/static/style.css">
 </head>
 <body>
 <div id="top-bar">
  <h1>Cognitive Agent Runtime</h1>
  <div id="status">disconnected</div>
 </div>
 <div id="main">
  <div class="panel chat-panel">
    <div class="panel-header chat-h">Chat</div>
    <div id="messages"></div>
    <div id="input-bar">
      <input id="input" placeholder="Type a message..." autocomplete="off">
      <button onclick="send()">Send</button>
    </div>
  </div>
  <div class="panel">
    <div class="panel-header trace-h">Trace</div>
    <div id="trace"></div>
  </div>
 </div>
 <script src="/static/app.js"></script>
 </body>
 </html>
--- a/static/style.css
+++ b/static/style.css
@ -0,0 +1,56 @@
 * { margin: 0; padding: 0; box-sizing: border-box; }
 body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0; height: 100vh; display: flex; flex-direction: column; }
 /* Top bar */
 #top-bar { display: flex; align-items: center; gap: 1rem; padding: 0.4rem 1rem; background: #111; border-bottom: 1px solid #222; }
 #top-bar h1 { font-size: 0.85rem; font-weight: 600; color: #888; }
 #status { font-size: 0.75rem; color: #666; }
 /* Two-column layout: chat 1/3 | trace 2/3 */
 #main { flex: 1; display: grid; grid-template-columns: 1fr 2fr; gap: 1px; background: #222; overflow: hidden; min-height: 0; }
 .panel { background: #0a0a0a; display: flex; flex-direction: column; overflow: hidden; }
 .panel-header { padding: 0.5rem 0.75rem; font-size: 0.75rem; font-weight: 700; text-transform: uppercase; letter-spacing: 0.05em; border-bottom: 1px solid #222; flex-shrink: 0; }
 .panel-header.chat-h { color: #60a5fa; background: #0a1628; }
 .panel-header.trace-h { color: #a78bfa; background: #120a1e; }
 /* Chat panel */
 .chat-panel { display: flex; flex-direction: column; }
 #messages { flex: 1; overflow-y: auto; padding: 0.5rem; display: flex; flex-direction: column; gap: 0.4rem; }
 .msg { max-width: 90%; padding: 0.5rem 0.75rem; border-radius: 0.6rem; line-height: 1.4; white-space: pre-wrap; font-size: 0.9rem; }
 .msg.user { align-self: flex-end; background: #2563eb; color: white; }
 .msg.assistant { align-self: flex-start; background: #1e1e1e; border: 1px solid #333; }
 .msg.assistant.streaming { border-color: #2563eb; }
 /* Input bar */
 #input-bar { display: flex; gap: 0.5rem; padding: 0.75rem; background: #111; border-top: 1px solid #222; }
 #input { flex: 1; padding: 0.5rem 0.75rem; background: #1a1a1a; color: #e0e0e0; border: 1px solid #333; border-radius: 0.4rem; font-size: 0.9rem; outline: none; }
 #input:focus { border-color: #2563eb; }
 button { padding: 0.5rem 1rem; background: #2563eb; color: white; border: none; border-radius: 0.4rem; cursor: pointer; font-size: 0.9rem; }
 button:hover { background: #1d4ed8; }
 /* Trace panel */
 #trace { flex: 1; overflow-y: auto; padding: 0.5rem; font-family: 'JetBrains Mono', 'Cascadia Code', 'Fira Code', monospace; font-size: 0.72rem; line-height: 1.5; }
 .trace-line { padding: 0.15rem 0.4rem; border-bottom: 1px solid #111; display: flex; gap: 0.5rem; align-items: baseline; }
 .trace-line:hover { background: #1a1a2e; }
 .trace-ts { color: #555; flex-shrink: 0; min-width: 5rem; }
 .trace-node { font-weight: 700; flex-shrink: 0; min-width: 6rem; }
 .trace-node.input { color: #f59e0b; }
 .trace-node.output { color: #34d399; }
 .trace-node.memorizer { color: #c084fc; }
 .trace-node.runtime { color: #60a5fa; }
 .trace-event { color: #888; flex-shrink: 0; min-width: 6rem; }
 .trace-data { color: #ccc; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
 .trace-data.instruction { color: #22c55e; }
 .trace-data.error { color: #ef4444; }
 .trace-data.state { color: #c084fc; }
 .trace-data.context { color: #666; }
 /* Expandable trace detail */
 .trace-line.expandable { cursor: pointer; }
 .trace-detail { display: none; padding: 0.3rem 0.4rem 0.3rem 12rem; font-size: 0.65rem; color: #777; white-space: pre-wrap; word-break: break-all; max-height: 10rem; overflow-y: auto; background: #0d0d14; border-bottom: 1px solid #1a1a2e; }
 .trace-detail.open { display: block; }