cognitive agent runtime v0.4.6: 3-node graph + Zitadel auth + K3s deploy

- Input/Output/Memorizer nodes with OpenRouter (Gemini Flash) - Zitadel OIDC auth with PKCE flow, service token for Titan - SSE event stream + poll endpoint for external observers - Identity from Zitadel userinfo, listener context in Input prompt - Trace logging to file + SSE broadcast - K3s deployment on IONOS with Let's Encrypt TLS - Frontend: chat + trace view, OIDC login Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 23:21:51 +01:00 · 2026-03-27 23:21:51 +01:00 · 569a6022fe
commit 569a6022fe
13 changed files with 1574 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,5 @@
+.venv/
+__pycache__/
+*.pyc
+.env
+trace.jsonl
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+.venv/
+__pycache__/
+*.pyc
+.env
+trace.jsonl
--- a/12
+++ b/12
@ -0,0 +1,12 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+EXPOSE 8000
+
+CMD ["uvicorn", "agent:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/agent.py
+++ b/agent.py
@ -0,0 +1,596 @@
+"""
+Cognitive Agent Runtime — Phase A.2: Three-node graph (Input → Output + Memorizer).
+Input decides WHAT to do. Output executes and streams.
+Memorizer holds shared state (S2 — coordination).
+"""
+
+import asyncio
+import json
+import os
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import httpx
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Depends, HTTPException, Query
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from fastapi.staticfiles import StaticFiles
+
+from dotenv import load_dotenv
+load_dotenv(Path(__file__).parent / ".env")
+
+# --- Config ---
+
+API_KEY = os.environ["OPENROUTER_API_KEY"]
+OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
+
+# --- Auth (Zitadel OIDC) ---
+
+ZITADEL_ISSUER = os.environ.get("ZITADEL_ISSUER", "https://auth.loop42.de")
+ZITADEL_CLIENT_ID = os.environ.get("ZITADEL_CLIENT_ID", "365996029172056091")
+ZITADEL_PROJECT_ID = os.environ.get("ZITADEL_PROJECT_ID", "365995955654230043")
+AUTH_ENABLED = os.environ.get("AUTH_ENABLED", "false").lower() == "true"
+SERVICE_TOKENS = set(filter(None, os.environ.get("SERVICE_TOKENS", "").split(",")))
+
+_jwks_cache: dict = {"keys": [], "fetched_at": 0}
+
+async def _get_jwks():
+    if time.time() - _jwks_cache["fetched_at"] < 3600:
+        return _jwks_cache["keys"]
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(f"{ZITADEL_ISSUER}/oauth/v2/keys")
+        _jwks_cache["keys"] = resp.json()["keys"]
+        _jwks_cache["fetched_at"] = time.time()
+    return _jwks_cache["keys"]
+
+async def _validate_token(token: str) -> dict:
+    """Validate token: check service tokens, then JWT, then introspection."""
+    import base64
+
+    # Check static service tokens (for machine accounts like titan)
+    if token in SERVICE_TOKENS:
+        return {"sub": "titan", "username": "titan", "source": "service_token"}
+
+    # Try JWT validation first
+    try:
+        parts = token.split(".")
+        if len(parts) == 3:
+            keys = await _get_jwks()
+            header_b64 = parts[0] + "=" * (4 - len(parts[0]) % 4)
+            header = json.loads(base64.urlsafe_b64decode(header_b64))
+            kid = header.get("kid")
+            key = next((k for k in keys if k["kid"] == kid), None)
+            if key:
+                import jwt as pyjwt
+                from jwt import PyJWK
+                jwk_obj = PyJWK(key)
+                claims = pyjwt.decode(
+                    token, jwk_obj.key, algorithms=["RS256"],
+                    issuer=ZITADEL_ISSUER, options={"verify_aud": False},
+                )
+                return claims
+    except Exception:
+        pass
+
+    # Fall back to introspection (for opaque access tokens)
+    # Zitadel requires client_id + client_secret or JWT profile for introspection
+    # For a public SPA client, use the project's API app instead
+    # Simplest: check via userinfo endpoint with the token
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(
+            f"{ZITADEL_ISSUER}/oidc/v1/userinfo",
+            headers={"Authorization": f"Bearer {token}"},
+        )
+        if resp.status_code == 200:
+            info = resp.json()
+            log.info(f"[auth] userinfo response: {info}")
+            return {"sub": info.get("sub"), "preferred_username": info.get("preferred_username"),
+                    "email": info.get("email"), "name": info.get("name"), "source": "userinfo"}
+
+    raise HTTPException(status_code=401, detail="Invalid token")
+
+_bearer = HTTPBearer(auto_error=False)
+
+async def require_auth(credentials: HTTPAuthorizationCredentials | None = Depends(_bearer)):
+    """Dependency: require valid JWT when AUTH_ENABLED."""
+    if not AUTH_ENABLED:
+        return {"sub": "anonymous"}
+    if not credentials:
+        raise HTTPException(status_code=401, detail="Missing token")
+    return await _validate_token(credentials.credentials)
+
+async def ws_auth(token: str | None = Query(None)) -> dict:
+    """Validate WebSocket token from query param."""
+    if not AUTH_ENABLED:
+        return {"sub": "anonymous"}
+    if not token:
+        return None  # Will reject in ws_endpoint
+    return await _validate_token(token)
+
+# --- LLM helper ---
+
+import logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s", datefmt="%H:%M:%S")
+log = logging.getLogger("runtime")
+
+
+async def llm_call(model: str, messages: list[dict], stream: bool = False) -> Any:
+    """Single LLM call via OpenRouter. Returns full text or (client, response) for streaming."""
+    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
+    body = {"model": model, "messages": messages, "stream": stream}
+
+    client = httpx.AsyncClient(timeout=60)
+    if stream:
+        resp = await client.send(client.build_request("POST", OPENROUTER_URL, headers=headers, json=body), stream=True)
+        return client, resp  # caller owns cleanup
+
+    resp = await client.post(OPENROUTER_URL, headers=headers, json=body)
+    await client.aclose()
+    data = resp.json()
+    if "choices" not in data:
+        log.error(f"LLM error: {data}")
+        return f"[LLM error: {data.get('error', {}).get('message', 'unknown')}]"
+    return data["choices"][0]["message"]["content"]
+
+
+# --- Message types ---
+
+@dataclass
+class Envelope:
+    """What flows between nodes."""
+    text: str
+    user_id: str = "anon"
+    session_id: str = ""
+    timestamp: str = ""
+
+
+@dataclass
+class Command:
+    """Input node's decision — tells Output what to do."""
+    instruction: str      # natural language command for Output LLM
+    source_text: str      # original user message (Output may need it)
+    metadata: dict = field(default_factory=dict)
+
+
+# --- Base Node ---
+
+class Node:
+    name: str = "node"
+    model: str | None = None
+
+    def __init__(self, send_hud):
+        self.send_hud = send_hud  # async callable to emit hud events to frontend
+
+    async def hud(self, event: str, **data):
+        await self.send_hud({"node": self.name, "event": event, **data})
+
+
+# --- Input Node ---
+
+class InputNode(Node):
+    name = "input"
+    model = "google/gemini-2.0-flash-001"
+
+    SYSTEM = """You are the Input node — the ear of this cognitive runtime.
+
+Listener context:
+- Authenticated user: {identity}
+- Channel: {channel} (Chrome browser on Nico's Windows PC, in his room at home)
+- Physical: private space, Nico lives with Tina — she may use this session too
+- Security: single-user account, shared physical space — other voices are trusted household
+
+You hear what comes through this channel. Emit ONE instruction sentence telling Output how to respond.
+No content, just the command.
+
+{memory_context}"""
+
+    async def process(self, envelope: Envelope, history: list[dict], memory_context: str = "",
+                      identity: str = "unknown", channel: str = "unknown") -> Command:
+        await self.hud("thinking", detail="deciding how to respond")
+        log.info(f"[input] user said: {envelope.text}")
+
+        messages = [
+            {"role": "system", "content": self.SYSTEM.format(
+                memory_context=memory_context, identity=identity, channel=channel)},
+        ]
+        # History already includes current user message — don't add it again
+        for msg in history[-8:]:
+            messages.append(msg)
+
+        await self.hud("context", messages=messages)
+        instruction = await llm_call(self.model, messages)
+        log.info(f"[input] → command: {instruction}")
+        await self.hud("decided", instruction=instruction)
+        return Command(instruction=instruction, source_text=envelope.text)
+
+
+# --- Output Node ---
+
+class OutputNode(Node):
+    name = "output"
+    model = "google/gemini-2.0-flash-001"
+
+    SYSTEM = """You are the Output node of a cognitive agent runtime.
+You receive a command from the Input node telling you HOW to respond, plus the user's original message.
+Follow the command's tone and intent. Be natural, don't mention the command or the runtime architecture.
+Be concise.
+
+{memory_context}"""
+
+    async def process(self, command: Command, history: list[dict], ws: WebSocket, memory_context: str = "") -> str:
+        await self.hud("streaming")
+
+        messages = [
+            {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
+        ]
+        # Conversation history for continuity (already includes current user message)
+        for msg in history[-20:]:
+            messages.append(msg)
+        # Inject command as system guidance after the user message
+        messages.append({"role": "system", "content": f"Input node command: {command.instruction}"})
+
+        await self.hud("context", messages=messages)
+
+        # Stream response
+        client, resp = await llm_call(self.model, messages, stream=True)
+        full_response = ""
+        try:
+            async for line in resp.aiter_lines():
+                if not line.startswith("data: "):
+                    continue
+                payload = line[6:]
+                if payload == "[DONE]":
+                    break
+                chunk = json.loads(payload)
+                delta = chunk["choices"][0].get("delta", {})
+                token = delta.get("content", "")
+                if token:
+                    full_response += token
+                    await ws.send_text(json.dumps({"type": "delta", "content": token}))
+        finally:
+            await resp.aclose()
+            await client.aclose()
+
+        log.info(f"[output] response: {full_response[:100]}...")
+        await ws.send_text(json.dumps({"type": "done"}))
+        await self.hud("done")
+        return full_response
+
+
+# --- Memorizer Node (S2 — shared state / coordination) ---
+
+class MemorizerNode(Node):
+    name = "memorizer"
+    model = "google/gemini-2.0-flash-001"
+
+    DISTILL_SYSTEM = """You are the Memorizer node of a cognitive agent runtime.
+After each exchange you update the shared state that Input and Output nodes read.
+
+Given the conversation so far, output a JSON object with these fields:
+- user_name: string — how the user identifies themselves (null if unknown)
+- user_mood: string — current emotional tone (neutral, happy, frustrated, playful, etc.)
+- topic: string — what the conversation is about right now
+- topic_history: list of strings — previous topics in this session
+- situation: string — social/physical context if mentioned (e.g. "at a pub with tina", "private dev session")
+- language: string — primary language being used (en, de, mixed)
+- style_hint: string — how Output should talk (casual, formal, technical, poetic, etc.)
+- facts: list of strings — important facts learned about the user
+
+Output ONLY valid JSON. No explanation, no markdown fences."""
+
+    def __init__(self, send_hud):
+        super().__init__(send_hud)
+        # The shared state — starts empty, grows over conversation
+        self.state: dict = {
+            "user_name": None,
+            "user_mood": "neutral",
+            "topic": None,
+            "topic_history": [],
+            "situation": "localhost test runtime, private dev session",
+            "language": "en",
+            "style_hint": "casual, technical",
+            "facts": [],
+        }
+
+    def get_context_block(self) -> str:
+        """Returns a formatted string for injection into Input/Output system prompts."""
+        lines = ["Shared memory (from Memorizer):"]
+        for k, v in self.state.items():
+            if v:
+                lines.append(f"- {k}: {v}")
+        return "\n".join(lines)
+
+    async def update(self, history: list[dict]):
+        """Distill conversation into updated shared state. Called after each exchange."""
+        if len(history) < 2:
+            await self.hud("updated", state=self.state)  # emit default state
+            return
+
+        await self.hud("thinking", detail="updating shared state")
+
+        messages = [
+            {"role": "system", "content": self.DISTILL_SYSTEM},
+            {"role": "system", "content": f"Current state: {json.dumps(self.state)}"},
+        ]
+        # Last few exchanges for distillation
+        for msg in history[-10:]:
+            messages.append(msg)
+        messages.append({"role": "user", "content": "Update the shared state based on this conversation. Output JSON only."})
+
+        await self.hud("context", messages=messages)
+
+        raw = await llm_call(self.model, messages)
+        log.info(f"[memorizer] raw: {raw[:200]}")
+
+        # Parse JSON from response (strip markdown fences if present)
+        text = raw.strip()
+        if text.startswith("```"):
+            text = text.split("\n", 1)[1] if "\n" in text else text[3:]
+            if text.endswith("```"):
+                text = text[:-3]
+            text = text.strip()
+
+        try:
+            new_state = json.loads(text)
+            # Merge: keep old facts, add new ones
+            old_facts = set(self.state.get("facts", []))
+            new_facts = set(new_state.get("facts", []))
+            new_state["facts"] = list(old_facts | new_facts)
+            # Preserve topic history
+            if self.state.get("topic") and self.state["topic"] != new_state.get("topic"):
+                hist = new_state.get("topic_history", [])
+                if self.state["topic"] not in hist:
+                    hist.append(self.state["topic"])
+                new_state["topic_history"] = hist[-5:]  # keep last 5
+            self.state = new_state
+            log.info(f"[memorizer] updated state: {self.state}")
+            await self.hud("updated", state=self.state)
+        except (json.JSONDecodeError, Exception) as e:
+            log.error(f"[memorizer] update error: {e}, raw: {text[:200]}")
+            await self.hud("error", detail=f"Update failed: {e}")
+            # Still emit current state so frontend shows something
+            await self.hud("updated", state=self.state)
+
+
+# --- Runtime (wires nodes together) ---
+
+TRACE_FILE = Path(__file__).parent / "trace.jsonl"
+
+
+class Runtime:
+    def __init__(self, ws: WebSocket, user_claims: dict = None, origin: str = ""):
+        self.ws = ws
+        self.history: list[dict] = []
+        self.input_node = InputNode(send_hud=self._send_hud)
+        self.output_node = OutputNode(send_hud=self._send_hud)
+        self.memorizer = MemorizerNode(send_hud=self._send_hud)
+        # Verified identity from auth — Input and Memorizer use this
+        claims = user_claims or {}
+        log.info(f"[runtime] user_claims: {claims}")
+        self.identity = claims.get("name") or claims.get("preferred_username") or claims.get("username") or "unknown"
+        log.info(f"[runtime] resolved identity: {self.identity}")
+        self.channel = origin or "unknown"
+        # Seed memorizer with verified info
+        self.memorizer.state["user_name"] = self.identity
+        self.memorizer.state["situation"] = f"authenticated on {self.channel}" if origin else "local session"
+
+    async def _send_hud(self, data: dict):
+        # Send to frontend
+        await self.ws.send_text(json.dumps({"type": "hud", **data}))
+        # Append to trace file + broadcast to SSE subscribers
+        trace_entry = {"ts": time.strftime("%Y-%m-%d %H:%M:%S.") + f"{time.time() % 1:.3f}"[2:], **data}
+        try:
+            with open(TRACE_FILE, "a", encoding="utf-8") as f:
+                f.write(json.dumps(trace_entry, ensure_ascii=False) + "\n")
+        except Exception as e:
+            log.error(f"trace write error: {e}")
+        _broadcast_sse(trace_entry)
+
+    async def handle_message(self, text: str):
+        envelope = Envelope(
+            text=text,
+            user_id="nico",
+            session_id="test",
+            timestamp=time.strftime("%Y-%m-%d %H:%M:%S"),
+        )
+
+        # Append user message to history FIRST — both nodes see it
+        self.history.append({"role": "user", "content": text})
+
+        # Get shared memory context for both nodes
+        mem_ctx = self.memorizer.get_context_block()
+
+        # Input node decides (with memory context + identity + channel)
+        command = await self.input_node.process(
+            envelope, self.history, memory_context=mem_ctx,
+            identity=self.identity, channel=self.channel)
+
+        # Output node executes (with memory context + history including user msg)
+        response = await self.output_node.process(command, self.history, self.ws, memory_context=mem_ctx)
+        self.history.append({"role": "assistant", "content": response})
+
+        # Memorizer updates shared state after each exchange
+        await self.memorizer.update(self.history)
+
+
+# --- App ---
+
+STATIC_DIR = Path(__file__).parent / "static"
+
+app = FastAPI(title="Cognitive Agent Runtime")
+
+# Keep a reference to the active runtime for API access
+_active_runtime: Runtime | None = None
+
+
+@app.get("/health")
+async def health():
+    return {"status": "ok"}
+
+
+@app.get("/auth/config")
+async def auth_config():
+    """Public: auth config for frontend OIDC flow."""
+    return {
+        "enabled": AUTH_ENABLED,
+        "issuer": ZITADEL_ISSUER,
+        "clientId": ZITADEL_CLIENT_ID,
+        "projectId": ZITADEL_PROJECT_ID,
+    }
+
+
+@app.websocket("/ws")
+async def ws_endpoint(ws: WebSocket, token: str | None = Query(None), access_token: str | None = Query(None)):
+    global _active_runtime
+    # Validate auth if enabled
+    user_claims = {"sub": "anonymous"}
+    if AUTH_ENABLED and token:
+        try:
+            user_claims = await _validate_token(token)
+            # If id_token lacks name, enrich from userinfo with access_token
+            if not user_claims.get("name") and access_token:
+                async with httpx.AsyncClient() as client:
+                    resp = await client.get(f"{ZITADEL_ISSUER}/oidc/v1/userinfo",
+                                            headers={"Authorization": f"Bearer {access_token}"})
+                    if resp.status_code == 200:
+                        info = resp.json()
+                        log.info(f"[auth] userinfo enrichment: {info}")
+                        user_claims["name"] = info.get("name")
+                        user_claims["preferred_username"] = info.get("preferred_username")
+                        user_claims["email"] = info.get("email")
+        except HTTPException:
+            await ws.close(code=4001, reason="Invalid token")
+            return
+    origin = ws.headers.get("origin", ws.headers.get("host", ""))
+    await ws.accept()
+    runtime = Runtime(ws, user_claims=user_claims, origin=origin)
+    _active_runtime = runtime
+    try:
+        while True:
+            data = await ws.receive_text()
+            msg = json.loads(data)
+            await runtime.handle_message(msg["text"])
+    except WebSocketDisconnect:
+        if _active_runtime is runtime:
+            _active_runtime = None
+
+
+# --- API endpoints (for Claude to inspect runtime state) ---
+
+import hashlib
+from asyncio import Queue
+from starlette.responses import StreamingResponse
+
+# SSE subscribers (for titan/service accounts to watch live)
+_sse_subscribers: list[Queue] = []
+
+def _broadcast_sse(event: dict):
+    """Push an event to all SSE subscribers."""
+    for q in _sse_subscribers:
+        try:
+            q.put_nowait(event)
+        except asyncio.QueueFull:
+            pass  # drop if subscriber is too slow
+
+def _state_hash() -> str:
+    """Hash of current runtime state — cheap way to detect changes."""
+    if not _active_runtime:
+        return "no_session"
+    raw = json.dumps({
+        "mem": _active_runtime.memorizer.state,
+        "hlen": len(_active_runtime.history),
+    }, sort_keys=True)
+    return hashlib.md5(raw.encode()).hexdigest()[:12]
+
+
+@app.get("/api/events")
+async def sse_events(user=Depends(require_auth)):
+    """SSE stream of runtime events (trace, state changes)."""
+    q: Queue = Queue(maxsize=100)
+    _sse_subscribers.append(q)
+
+    async def generate():
+        try:
+            while True:
+                event = await q.get()
+                yield f"data: {json.dumps(event)}\n\n"
+        except asyncio.CancelledError:
+            pass
+        finally:
+            _sse_subscribers.remove(q)
+
+    return StreamingResponse(generate(), media_type="text/event-stream",
+                            headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
+
+
+@app.get("/api/poll")
+async def poll(since: str = "", user=Depends(require_auth)):
+    """Returns current hash. If 'since' matches, returns {changed: false}. Cheap polling."""
+    h = _state_hash()
+    if since and since == h:
+        return {"changed": False, "hash": h}
+    return {
+        "changed": True,
+        "hash": h,
+        "state": _active_runtime.memorizer.state if _active_runtime else None,
+        "history_len": len(_active_runtime.history) if _active_runtime else 0,
+        "last_messages": _active_runtime.history[-3:] if _active_runtime else [],
+    }
+
+@app.get("/api/state")
+async def get_state(user=Depends(require_auth)):
+    """Current memorizer state + history length."""
+    if not _active_runtime:
+        return {"status": "no_session"}
+    return {
+        "status": "active",
+        "memorizer": _active_runtime.memorizer.state,
+        "history_len": len(_active_runtime.history),
+    }
+
+
+@app.get("/api/history")
+async def get_history(last: int = 10, user=Depends(require_auth)):
+    """Recent conversation history."""
+    if not _active_runtime:
+        return {"status": "no_session", "messages": []}
+    return {
+        "status": "active",
+        "messages": _active_runtime.history[-last:],
+    }
+
+
+@app.get("/api/trace")
+async def get_trace(last: int = 30, user=Depends(require_auth)):
+    """Recent trace lines from trace.jsonl."""
+    if not TRACE_FILE.exists():
+        return {"lines": []}
+    lines = TRACE_FILE.read_text(encoding="utf-8").strip().split("\n")
+    parsed = []
+    for line in lines[-last:]:
+        try:
+            parsed.append(json.loads(line))
+        except json.JSONDecodeError:
+            pass
+    return {"lines": parsed}
+
+
+# Serve index.html explicitly, then static assets
+from fastapi.responses import FileResponse
+
+@app.get("/")
+async def index():
+    return FileResponse(STATIC_DIR / "index.html")
+
+@app.get("/callback")
+async def callback():
+    """OIDC callback — serves the same SPA, JS handles the code exchange."""
+    return FileResponse(STATIC_DIR / "index.html")
+
+app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("agent:app", host="0.0.0.0", port=8000, reload=True)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,12 @@
+services:
+  runtime:
+    build: .
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./agent.py:/app/agent.py
+      - ./static:/app/static
+      - ./trace.jsonl:/app/trace.jsonl
+    env_file:
+      - .env
+    restart: unless-stopped
--- a/k8s/cog-ingress.yaml
+++ b/k8s/cog-ingress.yaml
@ -0,0 +1,25 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: cog-runtime
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-prod
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik
+  tls:
+    - secretName: cog-tls
+      hosts:
+        - cog.loop42.de
+  rules:
+    - host: cog.loop42.de
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: agent-runtime
+                port:
+                  number: 80
--- a/k8s/zitadel-login-ingress.yaml
+++ b/k8s/zitadel-login-ingress.yaml
@ -0,0 +1,25 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: zitadel-login
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-prod
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik
+  tls:
+    - secretName: zitadel-tls
+      hosts:
+        - auth.loop42.de
+  rules:
+    - host: auth.loop42.de
+      http:
+        paths:
+          - path: /ui/v2/login
+            pathType: Prefix
+            backend:
+              service:
+                name: zitadel-login
+                port:
+                  number: 3000
--- a/k8s/zitadel-values.yaml
+++ b/k8s/zitadel-values.yaml
@ -0,0 +1,85 @@
+replicaCount: 1
+
+zitadel:
+  masterkeySecretName: zitadel-masterkey
+  configmapConfig:
+    ExternalDomain: auth.loop42.de
+    ExternalPort: 443
+    ExternalSecure: true
+    TLS:
+      Enabled: false
+    Database:
+      Postgres:
+        Host: zitadel-db-postgresql
+        Port: 5432
+        Database: zitadel
+        MaxOpenConns: 10
+        MaxIdleConns: 5
+        MaxConnLifetime: 30m
+        MaxConnIdleTime: 5m
+        User:
+          Username: zitadel
+          SSL:
+            Mode: disable
+        Admin:
+          Username: postgres
+          SSL:
+            Mode: disable
+    FirstInstance:
+      Org:
+        Name: loop42
+        Human:
+          UserName: nico
+          FirstName: Nico
+          LastName: Zimmermann
+          NickName: nico
+          Email:
+            Address: nico@loop42.de
+            Verified: true
+          Password: ChangeMe42!
+          PasswordChangeRequired: true
+
+  secretConfig:
+    Database:
+      Postgres:
+        User:
+          Password: zitadel-db-pw-42
+        Admin:
+          Password: postgres-admin-pw-42
+
+ingress:
+  enabled: true
+  className: traefik
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-prod
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+  hosts:
+    - host: auth.loop42.de
+      paths:
+        - path: /
+          pathType: Prefix
+  tls:
+    - secretName: zitadel-tls
+      hosts:
+        - auth.loop42.de
+
+resources:
+  requests:
+    cpu: 100m
+    memory: 256Mi
+  limits:
+    cpu: 1000m
+    memory: 512Mi
+
+setupJob:
+  resources:
+    requests:
+      cpu: 100m
+      memory: 128Mi
+    limits:
+      cpu: 500m
+      memory: 256Mi
+
+postgresql:
+  enabled: false
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,8 @@
+fastapi==0.135.2
+uvicorn==0.42.0
+httpx==0.28.1
+httpx-sse==0.4.3
+websockets==16.0
+python-dotenv==1.2.2
+pydantic==2.12.5
+PyJWT[crypto]==2.10.1
--- a/static/app.js
+++ b/static/app.js
@ -0,0 +1,229 @@
+const msgs = document.getElementById('messages');
+const inputEl = document.getElementById('input');
+const statusEl = document.getElementById('status');
+const traceEl = document.getElementById('trace');
+let ws, currentEl;
+let authToken = localStorage.getItem('cog_token');
+let authConfig = null;
+
+// --- OIDC Auth ---
+
+async function initAuth() {
+  try {
+    const resp = await fetch('/auth/config');
+    authConfig = await resp.json();
+  } catch { authConfig = { enabled: false }; }
+
+  if (!authConfig.enabled) { connect(); return; }
+
+  // Handle OIDC callback
+  if (location.pathname === '/callback') {
+    const params = new URLSearchParams(location.search);
+    const code = params.get('code');
+    const verifier = sessionStorage.getItem('pkce_verifier');
+    if (code && verifier) {
+      const tokenResp = await fetch(authConfig.issuer + '/oauth/v2/token', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
+        body: new URLSearchParams({
+          grant_type: 'authorization_code',
+          client_id: authConfig.clientId,
+          code,
+          redirect_uri: location.origin + '/callback',
+          code_verifier: verifier,
+        }),
+      });
+      const tokens = await tokenResp.json();
+      if (tokens.access_token) {
+        // Store access token for userinfo, id_token for JWT validation
+        localStorage.setItem('cog_access_token', tokens.access_token);
+        authToken = tokens.id_token || tokens.access_token;
+        localStorage.setItem('cog_token', authToken);
+        sessionStorage.removeItem('pkce_verifier');
+      }
+    }
+    history.replaceState(null, '', '/');
+  }
+
+  if (authToken) {
+    connect();
+  } else {
+    showLogin();
+  }
+}
+
+function showLogin() {
+  statusEl.textContent = 'not authenticated';
+  statusEl.style.color = '#f59e0b';
+  const btn = document.createElement('button');
+  btn.textContent = 'Log in with loop42';
+  btn.className = 'login-btn';
+  btn.onclick = startLogin;
+  document.getElementById('input-bar').replaceChildren(btn);
+}
+
+async function startLogin() {
+  // PKCE: generate code_verifier + code_challenge
+  const verifier = randomString(64);
+  sessionStorage.setItem('pkce_verifier', verifier);
+  const encoder = new TextEncoder();
+  const digest = await crypto.subtle.digest('SHA-256', encoder.encode(verifier));
+  const challenge = btoa(String.fromCharCode(...new Uint8Array(digest)))
+    .replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
+
+  const params = new URLSearchParams({
+    response_type: 'code',
+    client_id: authConfig.clientId,
+    redirect_uri: location.origin + '/callback',
+    scope: 'openid profile email',
+    code_challenge: challenge,
+    code_challenge_method: 'S256',
+  });
+  location.href = authConfig.issuer + '/oauth/v2/authorize?' + params;
+}
+
+function randomString(len) {
+  const arr = new Uint8Array(len);
+  crypto.getRandomValues(arr);
+  return btoa(String.fromCharCode(...arr)).replace(/[^a-zA-Z0-9]/g, '').slice(0, len);
+}
+
+// --- WebSocket ---
+
+function connect() {
+  const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
+  let wsUrl = proto + '//' + location.host + '/ws';
+  if (authToken) {
+    const accessToken = localStorage.getItem('cog_access_token') || '';
+    wsUrl += '?token=' + encodeURIComponent(authToken) + '&access_token=' + encodeURIComponent(accessToken);
+  }
+  ws = new WebSocket(wsUrl);
+
+  ws.onopen = () => {
+    statusEl.textContent = 'connected';
+    statusEl.style.color = '#22c55e';
+    addTrace('runtime', 'connected', 'ws open');
+  };
+
+  ws.onclose = () => {
+    statusEl.textContent = 'disconnected';
+    statusEl.style.color = '#666';
+    addTrace('runtime', 'disconnected', 'ws closed');
+    setTimeout(connect, 2000);
+  };
+
+  ws.onmessage = (e) => {
+    const data = JSON.parse(e.data);
+
+    if (data.type === 'hud') {
+      handleHud(data);
+
+    } else if (data.type === 'delta') {
+      if (!currentEl) {
+        currentEl = addMsg('assistant', '');
+        currentEl.classList.add('streaming');
+      }
+      currentEl.textContent += data.content;
+      scroll(msgs);
+
+    } else if (data.type === 'done') {
+      if (currentEl) currentEl.classList.remove('streaming');
+      currentEl = null;
+    }
+  };
+}
+
+function handleHud(data) {
+  const node = data.node || 'unknown';
+  const event = data.event || '';
+
+  if (event === 'context') {
+    // Expandable: show message count, click to see full context
+    const count = (data.messages || []).length;
+    const summary = count + ' msgs: ' + (data.messages || []).map(m =>
+      m.role[0].toUpperCase() + ':' + truncate(m.content, 30)
+    ).join(' | ');
+    const detail = (data.messages || []).map((m, i) =>
+      i + ' [' + m.role + '] ' + m.content
+    ).join('\n');
+    addTrace(node, 'context', summary, 'context', detail);
+
+  } else if (event === 'decided') {
+    addTrace(node, 'decided', data.instruction, 'instruction');
+
+  } else if (event === 'updated' && data.state) {
+    const pairs = Object.entries(data.state).map(([k, v]) => {
+      const val = Array.isArray(v) ? v.join(', ') : (v || 'null');
+      return k + '=' + truncate(val, 25);
+    }).join('  ');
+    const detail = JSON.stringify(data.state, null, 2);
+    addTrace(node, 'state', pairs, 'state', detail);
+
+  } else if (event === 'error') {
+    addTrace(node, 'error', data.detail || '', 'error');
+
+  } else if (event === 'thinking') {
+    addTrace(node, 'thinking', data.detail || '');
+
+  } else if (event === 'streaming') {
+    addTrace(node, 'streaming', '');
+
+  } else if (event === 'done') {
+    addTrace(node, 'done', '');
+
+  } else {
+    // Generic fallback
+    const detail = JSON.stringify(data, null, 2);
+    addTrace(node, event, '', '', detail);
+  }
+}
+
+function addTrace(node, event, text, cls, detail) {
+  const line = document.createElement('div');
+  line.className = 'trace-line' + (detail ? ' expandable' : '');
+
+  const ts = new Date().toLocaleTimeString('de-DE', { hour12: false, hour: '2-digit', minute: '2-digit', second: '2-digit', fractionalSecondDigits: 1 });
+
+  line.innerHTML =
+    '<span class="trace-ts">' + ts + '</span>' +
+    '<span class="trace-node ' + esc(node) + '">' + esc(node) + '</span>' +
+    '<span class="trace-event">' + esc(event) + '</span>' +
+    '<span class="trace-data' + (cls ? ' ' + cls : '') + '">' + esc(text) + '</span>';
+
+  traceEl.appendChild(line);
+
+  if (detail) {
+    const detailEl = document.createElement('div');
+    detailEl.className = 'trace-detail';
+    detailEl.textContent = detail;
+    traceEl.appendChild(detailEl);
+    line.addEventListener('click', () => detailEl.classList.toggle('open'));
+  }
+
+  scroll(traceEl);
+}
+
+function scroll(el) { el.scrollTop = el.scrollHeight; }
+function esc(s) { const d = document.createElement('span'); d.textContent = s; return d.innerHTML; }
+function truncate(s, n) { return s.length > n ? s.slice(0, n) + '\u2026' : s; }
+
+function addMsg(role, text) {
+  const el = document.createElement('div');
+  el.className = 'msg ' + role;
+  el.textContent = text;
+  msgs.appendChild(el);
+  scroll(msgs);
+  return el;
+}
+
+function send() {
+  const text = inputEl.value.trim();
+  if (!text || !ws || ws.readyState !== 1) return;
+  addMsg('user', text);
+  addTrace('runtime', 'user_msg', truncate(text, 60));
+  ws.send(JSON.stringify({ text }));
+  inputEl.value = '';
+}
+
+inputEl.addEventListener('keydown', (e) => { if (e.key === 'Enter') send(); });
+initAuth();
--- a/static/design.html
+++ b/static/design.html
@ -0,0 +1,483 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Cognitive Runtime — Design Exploration</title>
+<style>
+  * { margin: 0; padding: 0; box-sizing: border-box; }
+  body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #d4d4d4; line-height: 1.6; }
+  .page { max-width: 1400px; margin: 0 auto; padding: 2rem; }
+  h1 { color: #f59e0b; font-size: 1.6rem; margin-bottom: 0.5rem; }
+  h2 { color: #60a5fa; font-size: 1.2rem; margin: 2rem 0 0.75rem; border-bottom: 1px solid #222; padding-bottom: 0.3rem; }
+  h3 { color: #34d399; font-size: 1rem; margin: 1.2rem 0 0.5rem; }
+  p, li { font-size: 0.9rem; }
+  ul { padding-left: 1.2rem; margin: 0.3rem 0; }
+  li { margin: 0.2rem 0; }
+  .subtitle { color: #888; font-size: 0.85rem; margin-bottom: 2rem; }
+  code { background: #1a1a2e; padding: 0.1rem 0.4rem; border-radius: 0.2rem; font-size: 0.85rem; color: #a78bfa; }
+
+  /* Three-column sections */
+  .three-col { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 1rem; margin: 1rem 0; }
+  .card { background: #111; border: 1px solid #222; border-radius: 0.5rem; padding: 1rem; }
+  .card h3 { margin-top: 0; }
+  .card.amber { border-color: #f59e0b33; }
+  .card.blue { border-color: #60a5fa33; }
+  .card.green { border-color: #34d39933; }
+  .card.purple { border-color: #a78bfa33; }
+  .card.red { border-color: #ef444433; }
+
+  /* Graph visualization */
+  .graph { background: #0f0f1a; border: 1px solid #222; border-radius: 0.5rem; padding: 1.5rem; margin: 1rem 0; font-family: monospace; font-size: 0.8rem; white-space: pre; line-height: 1.4; overflow-x: auto; }
+  .graph .sensor { color: #f59e0b; }
+  .graph .controller { color: #60a5fa; }
+  .graph .effector { color: #34d399; }
+  .graph .memory { color: #a78bfa; }
+  .graph .feedback { color: #f472b6; }
+  .graph .arrow { color: #555; }
+
+  /* Test cases table */
+  table { width: 100%; border-collapse: collapse; margin: 1rem 0; font-size: 0.85rem; }
+  th { text-align: left; padding: 0.5rem; background: #1a1a2e; color: #60a5fa; border-bottom: 2px solid #333; }
+  td { padding: 0.5rem; border-bottom: 1px solid #1a1a1a; }
+  tr:hover td { background: #111; }
+  .tag { display: inline-block; padding: 0.1rem 0.4rem; border-radius: 0.2rem; font-size: 0.7rem; font-weight: 600; }
+  .tag.now { background: #22c55e22; color: #22c55e; }
+  .tag.next { background: #f59e0b22; color: #f59e0b; }
+  .tag.later { background: #60a5fa22; color: #60a5fa; }
+  .tag.node { background: #a78bfa22; color: #a78bfa; }
+
+  /* Roadmap */
+  .phase { display: flex; gap: 1rem; align-items: flex-start; margin: 0.75rem 0; padding: 0.75rem; background: #111; border-radius: 0.5rem; border-left: 3px solid #333; }
+  .phase.active { border-left-color: #22c55e; }
+  .phase.planned { border-left-color: #f59e0b; }
+  .phase.future { border-left-color: #60a5fa; }
+  .phase-num { font-size: 1.5rem; font-weight: 700; color: #333; min-width: 2rem; }
+  .phase.active .phase-num { color: #22c55e; }
+  .phase.planned .phase-num { color: #f59e0b; }
+  .phase.future .phase-num { color: #60a5fa; }
+</style>
+</head>
+<body>
+<div class="page">
+
+<h1>Cognitive Agent Runtime — Design Exploration</h1>
+<div class="subtitle">Node graph architecture grounded in cybernetics, Cynefin, actor-network theory, and signal processing</div>
+
+<!-- ================================================================== -->
+<h2>Theoretical Grounding</h2>
+
+<div class="three-col">
+  <div class="card amber">
+    <h3>Cybernetics (Wiener, Ashby, Beer)</h3>
+    <p><strong>Core idea:</strong> systems that regulate themselves through feedback loops.</p>
+    <ul>
+      <li><strong>Ashby's Law of Requisite Variety</strong> — the controller must have at least as much variety as the disturbance. One monolithic agent fails because it can't match the variety of all inputs. Specialized nodes CAN.</li>
+      <li><strong>Viable System Model (Beer)</strong> — every viable system has 5 subsystems: Operations (Output), Coordination (Router), Control (Input), Intelligence (Thinker), Policy (human/config). Our graph maps directly.</li>
+      <li><strong>Homeostasis</strong> — the system maintains stability through feedback. The I/O Feedback node IS the homeostatic loop — detecting drift, repeated failures, frustration.</li>
+      <li><strong>Circular causality</strong> — output affects input. The user's next message is shaped by the response. The graph must be a loop, not a pipeline.</li>
+    </ul>
+    <p style="margin-top:0.5rem;color:#f59e0b;font-size:0.8rem;"><strong>Design takeaway:</strong> Every node is either a sensor, controller, or effector. Missing any breaks the feedback loop.</p>
+  </div>
+
+  <div class="card blue">
+    <h3>Cynefin (Snowden)</h3>
+    <p><strong>Core idea:</strong> different problem domains need different response strategies.</p>
+    <ul>
+      <li><strong>Clear</strong> — "what time is it?" → sense-categorize-respond. Input routes directly to Output, no Thinker needed. Fast.</li>
+      <li><strong>Complicated</strong> — "how do I deploy to K3s?" → sense-analyze-respond. Input routes to Thinker with tools. Expert knowledge.</li>
+      <li><strong>Complex</strong> — "should we use microservices?" → probe-sense-respond. Thinker explores, Memorizer tracks evolving understanding. No single right answer.</li>
+      <li><strong>Chaotic</strong> — system is down, user panicking → act-sense-respond. Output responds FIRST (acknowledge), then Input figures out what happened.</li>
+      <li><strong>Confused</strong> — unclear what domain we're in → Input's primary job! Classify before routing.</li>
+    </ul>
+    <p style="margin-top:0.5rem;color:#60a5fa;font-size:0.8rem;"><strong>Design takeaway:</strong> Input node IS the Cynefin classifier. Different domains = different graph paths.</p>
+  </div>
+
+  <div class="card green">
+    <h3>Actor-Network Theory (Latour)</h3>
+    <p><strong>Core idea:</strong> capability emerges from the network, not individual actors.</p>
+    <ul>
+      <li><strong>Actants</strong> — both human and non-human entities have agency. Each node is an actant. The user is an actant. The LLM API is an actant.</li>
+      <li><strong>Translation</strong> — messages change form as they pass through the network. User text → envelope → command → LLM prompt → stream → display. Each node translates.</li>
+      <li><strong>Irreducibility</strong> — you can't reduce the system to one actor. No single node "is" the agent. The GRAPH is the agent.</li>
+      <li><strong>Enrollment</strong> — new nodes join the network as needed. Tool nodes, sensor nodes, human-in-the-loop nodes. The graph grows.</li>
+    </ul>
+    <p style="margin-top:0.5rem;color:#34d399;font-size:0.8rem;"><strong>Design takeaway:</strong> The graph IS the intelligence. Nodes are replaceable. Edges are where meaning happens.</p>
+  </div>
+</div>
+
+<!-- ================================================================== -->
+<h2>Node Taxonomy (from signal processing + cybernetics)</h2>
+
+<div class="three-col">
+  <div class="card amber">
+    <h3>Sensors (perceive)</h3>
+    <ul>
+      <li><strong>Input</strong> — user message sensor</li>
+      <li><strong>Timer</strong> — periodic trigger (cron, polling)</li>
+      <li><strong>Webhook</strong> — external event sensor</li>
+      <li><strong>FileWatch</strong> — filesystem change sensor</li>
+      <li><strong>SystemProbe</strong> — health/load sensor</li>
+    </ul>
+    <p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Like a webcam node in TouchDesigner. Always on, emits when something happens.</p>
+  </div>
+
+  <div class="card blue">
+    <h3>Controllers (decide + transform)</h3>
+    <ul>
+      <li><strong>Classifier</strong> — categorize input (Cynefin domain, intent, tone)</li>
+      <li><strong>Router</strong> — direct to different paths based on classification</li>
+      <li><strong>Thinker</strong> — deep reasoning, tool use</li>
+      <li><strong>Filter</strong> — reduce, summarize, extract</li>
+      <li><strong>Accumulator</strong> — aggregate over time (topic tracker)</li>
+    </ul>
+    <p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Like filter/transform nodes in Max/MSP. Shape the signal.</p>
+  </div>
+
+  <div class="card green">
+    <h3>Effectors (act)</h3>
+    <ul>
+      <li><strong>Output</strong> — stream text to user</li>
+      <li><strong>Feedback</strong> — emit HUD/status events</li>
+      <li><strong>ToolExec</strong> — execute external tools</li>
+      <li><strong>Writer</strong> — persist to storage</li>
+      <li><strong>Notifier</strong> — push to external systems</li>
+    </ul>
+    <p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Like output nodes in Unreal Blueprints. Make something happen in the world.</p>
+  </div>
+</div>
+
+<div class="card purple" style="margin:1rem 0;">
+  <h3>Memory (special: both reads and writes)</h3>
+  <ul>
+    <li><strong>Memorizer</strong> — working memory, session history, user facts (SQLite/JSON)</li>
+    <li><strong>TopicTracker</strong> — maintains list of active conversation topics</li>
+    <li><strong>ContextBuilder</strong> — assembles the right context for each node on demand</li>
+  </ul>
+  <p style="color:#888;font-size:0.75rem;margin-top:0.5rem;">Memory nodes are unique: they're called BY other nodes, not just wired in sequence. They're shared state — the "blackboard" in classic AI.</p>
+</div>
+
+<!-- ================================================================== -->
+<h2>Graph Architecture — Full Vision</h2>
+
+<div class="graph">
+<span class="arrow">                          ┌─────────────────────────────────────────────────┐</span>
+<span class="arrow">                          │                                                 │</span>
+<span class="arrow">                          ▼                                                 │</span>
+<span class="sensor">  [User WS] ──► [Input/Classifier]</span><span class="arrow"> ──┬──────────────────────────┐          │</span>
+<span class="arrow">                     │              │                          │          │</span>
+<span class="arrow">                     │    ┌─────────┘                          │          │</span>
+<span class="arrow">                     │    │  </span><span class="feedback">Cynefin routing</span><span class="arrow">                    │          │</span>
+<span class="arrow">                     │    │                                    │          │</span>
+<span class="arrow">            </span><span class="feedback">Clear:</span><span class="arrow">  │    │  </span><span class="controller">Complicated/Complex:</span><span class="arrow">           │          │</span>
+<span class="arrow">          (skip     │    │                                    ▼          │</span>
+<span class="arrow">           thinker) │    ▼                              </span><span class="memory">[Memorizer]</span><span class="arrow">     │</span>
+<span class="arrow">                    │  </span><span class="controller">[Thinker]</span><span class="arrow"> ◄── context ──────── </span><span class="memory">    │    </span><span class="arrow">     │</span>
+<span class="arrow">                    │    │  │                             </span><span class="memory">    ▲    </span><span class="arrow">     │</span>
+<span class="arrow">                    │    │  └──── memory updates ──────── </span><span class="memory">    │    </span><span class="arrow">     │</span>
+<span class="arrow">                    │    │  │                                  │          │</span>
+<span class="arrow">                    │    │  └──► </span><span class="effector">[ToolExec]</span><span class="arrow"> ─── results ──► │          │</span>
+<span class="arrow">                    │    │                                               │</span>
+<span class="arrow">                    ▼    ▼                                               │</span>
+<span class="arrow">               </span><span class="effector">[Output]</span><span class="arrow"> ──► [User WS] (stream delta/done)               │</span>
+<span class="arrow">                    │                                                    │</span>
+<span class="arrow">                    └──► </span><span class="feedback">[Feedback]</span><span class="arrow"> ──► [User WS] (hud events)           │</span>
+<span class="arrow">                              │                                         │</span>
+<span class="arrow">                              └──► </span><span class="feedback">[I/O Monitor]</span><span class="arrow"> ── hints ──────────┘</span>
+<span class="arrow">                                        │</span>
+<span class="sensor">  [Timer] ──────────────────────────────►│</span>
+<span class="sensor">  [Webhook] ────────────────────────────►│</span>
+<span class="sensor">  [SystemProbe] ────────────────────────►│</span>
+<span class="arrow">                                        │</span>
+<span class="arrow">                                        ▼</span>
+<span class="arrow">                                  </span><span class="memory">[TopicTracker]</span><span class="arrow"> ──► [UI: topic list, action buttons]</span>
+</div>
+
+<!-- ================================================================== -->
+<h2>Node-Based Programming Analogy</h2>
+
+<div class="three-col">
+  <div class="card">
+    <h3>TouchDesigner / Max/MSP</h3>
+    <ul>
+      <li>Webcam → filter → skeleton detector → output display</li>
+      <li><strong>Always running</strong> — not request/response</li>
+      <li>Nodes have typed inputs/outputs</li>
+      <li>Graph is the program</li>
+    </ul>
+  </div>
+  <div class="card">
+    <h3>ComfyUI (Stable Diffusion)</h3>
+    <ul>
+      <li>Prompt → CLIP → sampler → VAE → image</li>
+      <li>Each node: one model, one job</li>
+      <li>Swap nodes to change behavior</li>
+      <li>Visual graph = full transparency</li>
+    </ul>
+  </div>
+  <div class="card">
+    <h3>Our Cognitive Runtime</h3>
+    <ul>
+      <li>User msg → classify → think → stream → display</li>
+      <li>Each node: one LLM (or none), one job</li>
+      <li>Swap models per node via config</li>
+      <li><strong>Three-column view = our visual debugger</strong></li>
+    </ul>
+  </div>
+</div>
+
+<p style="color:#f59e0b;margin:1rem 0;">Key insight: like node-based visual programming, the graph runs <strong>continuously</strong>. Sensors fire, signals propagate, effectors act. The chat is just ONE sensor. Timer events, webhooks, system probes — all feed the same graph.</p>
+
+<!-- ================================================================== -->
+<h2>10 Test Use Cases</h2>
+
+<table>
+  <tr><th>#</th><th>Use Case</th><th>Tests</th><th>Nodes Needed</th><th>Phase</th></tr>
+  <tr>
+    <td>1</td>
+    <td><strong>Greeting</strong> — "hey!"</td>
+    <td>Input classifies casual, Output responds warmly. Verify command + context visible in panels.</td>
+    <td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
+    <td><span class="tag now">NOW</span></td>
+  </tr>
+  <tr>
+    <td>2</td>
+    <td><strong>Technical question</strong> — "how does asyncio.Queue work?"</td>
+    <td>Input classifies knowledge-needed. Output gives detailed answer. Context panel shows history growth.</td>
+    <td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
+    <td><span class="tag now">NOW</span></td>
+  </tr>
+  <tr>
+    <td>3</td>
+    <td><strong>Multi-turn follow-up</strong> — ask, then "tell me more"</td>
+    <td>Input sees follow-up pattern. Output uses history for continuity. Watch context grow in both panels.</td>
+    <td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
+    <td><span class="tag now">NOW</span></td>
+  </tr>
+  <tr>
+    <td>4</td>
+    <td><strong>Tone shift</strong> — friendly then frustrated "this is broken!"</td>
+    <td>Input detects tone change, adjusts command. Output shifts from casual to empathetic/helpful.</td>
+    <td><span class="tag node">Input</span> <span class="tag node">Output</span></td>
+    <td><span class="tag now">NOW</span></td>
+  </tr>
+  <tr>
+    <td>5</td>
+    <td><strong>Memory persistence</strong> — "my name is Nico" ... later ... "what's my name?"</td>
+    <td>Memorizer stores user fact. On later question, provides context to Thinker. Output answers correctly.</td>
+    <td><span class="tag node">Input</span> <span class="tag node">Memorizer</span> <span class="tag node">Output</span></td>
+    <td><span class="tag next">NEXT</span></td>
+  </tr>
+  <tr>
+    <td>6</td>
+    <td><strong>Background monitoring</strong> — "watch CPU load, alert if &gt;80%"</td>
+    <td>Timer/SystemProbe sensor fires periodically. Input classifies as monitoring. Feedback emits to UI without chat message.</td>
+    <td><span class="tag node">Timer</span> <span class="tag node">SystemProbe</span> <span class="tag node">Feedback</span></td>
+    <td><span class="tag later">LATER</span></td>
+  </tr>
+  <tr>
+    <td>7</td>
+    <td><strong>System unresponsive</strong> — LLM takes 30s+</td>
+    <td>Feedback node shows "thinking..." immediately. Timeout handling. User sees activity, not silence.</td>
+    <td><span class="tag node">Input</span> <span class="tag node">Feedback</span> <span class="tag node">Output</span></td>
+    <td><span class="tag next">NEXT</span></td>
+  </tr>
+  <tr>
+    <td>8</td>
+    <td><strong>Cynefin: Clear domain</strong> — "what's 2+2?"</td>
+    <td>Input classifies as Clear, skips Thinker, routes directly to Output. Faster response, cheaper.</td>
+    <td><span class="tag node">Input</span> <span class="tag node">Router</span> <span class="tag node">Output</span></td>
+    <td><span class="tag later">LATER</span></td>
+  </tr>
+  <tr>
+    <td>9</td>
+    <td><strong>Topic tracking</strong> — conversation covers 3 topics, user says "back to the first thing"</td>
+    <td>TopicTracker accumulates topics. UI shows topic list. User can click to refocus. Memorizer provides relevant context.</td>
+    <td><span class="tag node">TopicTracker</span> <span class="tag node">Memorizer</span> <span class="tag node">UI</span></td>
+    <td><span class="tag later">LATER</span></td>
+  </tr>
+  <tr>
+    <td>10</td>
+    <td><strong>UI action</strong> — button in top bar triggers "summarize this conversation"</td>
+    <td>Non-chat input (button click) enters the graph as a sensor event. Input classifies, routes to Thinker+Memorizer. Output renders summary.</td>
+    <td><span class="tag node">UI Sensor</span> <span class="tag node">Thinker</span> <span class="tag node">Memorizer</span></td>
+    <td><span class="tag later">LATER</span></td>
+  </tr>
+</table>
+
+<!-- ================================================================== -->
+<h2>Build Roadmap — What To Validate</h2>
+
+<div class="phase active">
+  <div class="phase-num">1</div>
+  <div>
+    <h3 style="color:#22c55e;margin:0;">Input + Output (NOW — we're here)</h3>
+    <p><strong>Validates:</strong> Two-node communication works. LLM-to-LLM command passing. Context isolation visible. Streaming through graph.</p>
+    <p><strong>Test cases:</strong> #1-4 (greeting, technical, follow-up, tone shift)</p>
+    <p><strong>Success = </strong> You can see Input's reasoning and Output's execution as separate contexts. The command is meaningful, not just pass-through.</p>
+  </div>
+</div>
+
+<div class="phase planned">
+  <div class="phase-num">2</div>
+  <div>
+    <h3 style="color:#f59e0b;margin:0;">+ Feedback Node (pure Python, no LLM)</h3>
+    <p><strong>Validates:</strong> Non-LLM nodes in the graph. Real-time status. Parallel event emission (HUD fires while Output streams).</p>
+    <p><strong>Test cases:</strong> #7 (unresponsive system — user sees "thinking..." not silence)</p>
+    <p><strong>Success = </strong> User always sees activity within 200ms. Feedback panel in the UI shows event timeline.</p>
+  </div>
+</div>
+
+<div class="phase planned">
+  <div class="phase-num">3</div>
+  <div>
+    <h3 style="color:#f59e0b;margin:0;">+ Memorizer Node (LLM: Gemini Flash + SQLite)</h3>
+    <p><strong>Validates:</strong> Persistent state across the graph. Node-to-node context requests (Thinker asks Memorizer for context). Three LLM calls per turn, each with different context.</p>
+    <p><strong>Test cases:</strong> #5 (memory persistence — "what's my name?")</p>
+    <p><strong>Success = </strong> Conversation survives page reload. Memorizer panel shows what it stores vs what it provides.</p>
+  </div>
+</div>
+
+<div class="phase future">
+  <div class="phase-num">4</div>
+  <div>
+    <h3 style="color:#60a5fa;margin:0;">+ Timer Sensor + SystemProbe (always-on)</h3>
+    <p><strong>Validates:</strong> The graph runs without user input. Sensor-driven processing. Background monitoring. The system is ALIVE, not just reactive.</p>
+    <p><strong>Test cases:</strong> #6 (background CPU monitoring)</p>
+    <p><strong>Success = </strong> UI shows activity without chat. Alerts appear. The graph is a living system, not a request/response pipe.</p>
+  </div>
+</div>
+
+<div class="phase future">
+  <div class="phase-num">5</div>
+  <div>
+    <h3 style="color:#60a5fa;margin:0;">+ Router + Cynefin Classification</h3>
+    <p><strong>Validates:</strong> Multi-path graph. Input classifies domain, routes differently. Clear = fast path (skip Thinker). Complex = deep path (Thinker + tools + Memorizer). The graph adapts to the problem.</p>
+    <p><strong>Test cases:</strong> #8 (Clear domain — fast path)</p>
+    <p><strong>Success = </strong> Simple questions are 3x faster. Complex questions get deeper treatment. Visible in the graph view.</p>
+  </div>
+</div>
+
+<div class="phase future">
+  <div class="phase-num">6</div>
+  <div>
+    <h3 style="color:#60a5fa;margin:0;">+ UI Extensions (topic list, action buttons, productivity)</h3>
+    <p><strong>Validates:</strong> The graph doesn't just do chat. Non-chat inputs (buttons, lists) enter the graph. Non-chat outputs (topic sidebar, action bar) exit the graph. Full cybernetic loop with rich UI.</p>
+    <p><strong>Test cases:</strong> #9 (topic tracking), #10 (UI button triggers graph)</p>
+    <p><strong>Success = </strong> The agent is a workspace tool, not just a chatbot.</p>
+  </div>
+</div>
+
+<!-- ================================================================== -->
+<h2>Viable System Model (Beer) — The Graph as Organism</h2>
+
+<div class="three-col">
+  <div class="card green">
+    <h3>System 1 — Operations</h3>
+    <p>The worker nodes doing actual work.</p>
+    <ul>
+      <li><strong>Thinker</strong> — reasoning, tool calls</li>
+      <li><strong>Output</strong> — response generation</li>
+      <li><strong>ToolExec</strong> — external actions</li>
+    </ul>
+  </div>
+  <div class="card blue">
+    <h3>System 2 — Coordination</h3>
+    <p>Prevents conflicts between System 1 nodes.</p>
+    <ul>
+      <li><strong>Router</strong> — sequencing, dedup</li>
+      <li><strong>ContextBuilder</strong> — shared context</li>
+      <li>Message queue ordering</li>
+    </ul>
+  </div>
+  <div class="card amber">
+    <h3>System 3 — Control</h3>
+    <p>Monitors performance, enforces policies.</p>
+    <ul>
+      <li><strong>Feedback</strong> — quality gates</li>
+      <li>Token budget manager</li>
+      <li>Rate limiter, safety filter</li>
+    </ul>
+  </div>
+</div>
+<div class="three-col">
+  <div class="card purple">
+    <h3>System 4 — Intelligence</h3>
+    <p>Looks outward + forward. Adapts.</p>
+    <ul>
+      <li><strong>I/O Monitor</strong> — pattern detection</li>
+      <li>Learns from failures</li>
+      <li>Adapts routing rules over time</li>
+    </ul>
+  </div>
+  <div class="card red">
+    <h3>System 5 — Identity</h3>
+    <p>What the graph IS and won't do.</p>
+    <ul>
+      <li>System prompts, persona</li>
+      <li>Safety boundaries</li>
+      <li><code>graph.md</code> config</li>
+    </ul>
+  </div>
+  <div class="card" style="border-color:#55555533;">
+    <h3>The Human Node</h3>
+    <p>User isn't outside the system — they're an actant (ANT).</p>
+    <ul>
+      <li>Messages = sensor signals</li>
+      <li>Approvals = gate controls</li>
+      <li>Corrections = feedback loops</li>
+      <li>The graph includes the human</li>
+    </ul>
+  </div>
+</div>
+
+<!-- ================================================================== -->
+<h2>Design Principles (Synthesis)</h2>
+
+<div class="three-col">
+  <div class="card amber">
+    <h3>1. Three-Layer Architecture</h3>
+    <p><em>From cybernetics</em></p>
+    <p>Perception (sensors, classifiers) → Decision (routers, controllers) → Action (processors, effectors). Feedback from action back to perception. Missing any layer breaks the loop.</p>
+  </div>
+  <div class="card blue">
+    <h3>2. Invest in Translations</h3>
+    <p><em>From ANT</em></p>
+    <p>Message schemas and inter-node protocols matter MORE than individual node intelligence. A mediocre LLM with excellent routing outperforms a brilliant LLM with bad routing. Capability is emergent from topology.</p>
+  </div>
+  <div class="card green">
+    <h3>3. Mixed Compute</h3>
+    <p><em>From signal processing</em></p>
+    <p>Only Processor nodes need LLMs. Classifiers, routers, filters, accumulators can be lightweight models or pure code. Keeps cost and latency sane. Reserve expensive calls for where reasoning matters.</p>
+  </div>
+</div>
+<div class="three-col">
+  <div class="card purple">
+    <h3>4. Typed Event-Driven Connections</h3>
+    <p><em>From Max/MSP, Blueprints</em></p>
+    <p>Distinguish <strong>trigger</strong> inputs (fire processing) from <strong>context</strong> inputs (available but passive). Type the message wires. Dampen every feedback cycle (max iterations, circuit breakers).</p>
+  </div>
+  <div class="card red">
+    <h3>5. Requisite Variety</h3>
+    <p><em>From Ashby</em></p>
+    <p>Classifier/router layer must distinguish at least as many input types as you have processing strategies. Under-classification = wasted capability. Over-classification = premature complexity.</p>
+  </div>
+  <div class="card" style="border-color:#55555533;">
+    <h3>6. Domain-Aware Routing</h3>
+    <p><em>From Cynefin</em></p>
+    <p>Clear = shallow/fast (skip Thinker). Complicated = specialist path. Complex = parallel probes. Chaotic = hardcoded fallback, act first. Different domains = different graph depths.</p>
+  </div>
+</div>
+
+<!-- ================================================================== -->
+<h2>The Key Insight</h2>
+
+<div class="card amber" style="margin:1rem 0;border-width:2px;">
+  <p style="font-size:1rem;color:#f59e0b;font-weight:600;">The graph IS the agent. Not a single LLM with a prompt. Not a chain of API calls. A living, always-on, multi-model network of specialized processors — exactly like a signal processing graph, but for language and reasoning.</p>
+  <p style="margin-top:0.5rem;">Each phase adds a node. Each node validates one architectural claim. If any claim fails, we learn something. If they all hold, we have a new kind of agent runtime.</p>
+  <p style="margin-top:0.5rem;color:#888;">The network is the capability (ANT). The variety must match the disturbance (Ashby). The domain determines the strategy (Cynefin). The organism needs all five systems to be viable (Beer).</p>
+</div>
+
+</div>
+</body>
+</html>
--- a/static/index.html
+++ b/static/index.html
@ -0,0 +1,33 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Cognitive Agent Runtime</title>
+<link rel="stylesheet" href="/static/style.css">
+</head>
+<body>
+
+<div id="top-bar">
+  <h1>Cognitive Agent Runtime</h1>
+  <div id="status">disconnected</div>
+</div>
+
+<div id="main">
+  <div class="panel chat-panel">
+    <div class="panel-header chat-h">Chat</div>
+    <div id="messages"></div>
+    <div id="input-bar">
+      <input id="input" placeholder="Type a message..." autocomplete="off">
+      <button onclick="send()">Send</button>
+    </div>
+  </div>
+  <div class="panel">
+    <div class="panel-header trace-h">Trace</div>
+    <div id="trace"></div>
+  </div>
+</div>
+
+<script src="/static/app.js"></script>
+</body>
+</html>
--- a/static/style.css
+++ b/static/style.css
@ -0,0 +1,56 @@
+* { margin: 0; padding: 0; box-sizing: border-box; }
+body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0; height: 100vh; display: flex; flex-direction: column; }
+
+/* Top bar */
+#top-bar { display: flex; align-items: center; gap: 1rem; padding: 0.4rem 1rem; background: #111; border-bottom: 1px solid #222; }
+#top-bar h1 { font-size: 0.85rem; font-weight: 600; color: #888; }
+#status { font-size: 0.75rem; color: #666; }
+
+/* Two-column layout: chat 1/3 | trace 2/3 */
+#main { flex: 1; display: grid; grid-template-columns: 1fr 2fr; gap: 1px; background: #222; overflow: hidden; min-height: 0; }
+
+.panel { background: #0a0a0a; display: flex; flex-direction: column; overflow: hidden; }
+.panel-header { padding: 0.5rem 0.75rem; font-size: 0.75rem; font-weight: 700; text-transform: uppercase; letter-spacing: 0.05em; border-bottom: 1px solid #222; flex-shrink: 0; }
+.panel-header.chat-h { color: #60a5fa; background: #0a1628; }
+.panel-header.trace-h { color: #a78bfa; background: #120a1e; }
+
+/* Chat panel */
+.chat-panel { display: flex; flex-direction: column; }
+#messages { flex: 1; overflow-y: auto; padding: 0.5rem; display: flex; flex-direction: column; gap: 0.4rem; }
+.msg { max-width: 90%; padding: 0.5rem 0.75rem; border-radius: 0.6rem; line-height: 1.4; white-space: pre-wrap; font-size: 0.9rem; }
+.msg.user { align-self: flex-end; background: #2563eb; color: white; }
+.msg.assistant { align-self: flex-start; background: #1e1e1e; border: 1px solid #333; }
+.msg.assistant.streaming { border-color: #2563eb; }
+
+/* Input bar */
+#input-bar { display: flex; gap: 0.5rem; padding: 0.75rem; background: #111; border-top: 1px solid #222; }
+#input { flex: 1; padding: 0.5rem 0.75rem; background: #1a1a1a; color: #e0e0e0; border: 1px solid #333; border-radius: 0.4rem; font-size: 0.9rem; outline: none; }
+#input:focus { border-color: #2563eb; }
+button { padding: 0.5rem 1rem; background: #2563eb; color: white; border: none; border-radius: 0.4rem; cursor: pointer; font-size: 0.9rem; }
+button:hover { background: #1d4ed8; }
+
+/* Trace panel */
+#trace { flex: 1; overflow-y: auto; padding: 0.5rem; font-family: 'JetBrains Mono', 'Cascadia Code', 'Fira Code', monospace; font-size: 0.72rem; line-height: 1.5; }
+
+.trace-line { padding: 0.15rem 0.4rem; border-bottom: 1px solid #111; display: flex; gap: 0.5rem; align-items: baseline; }
+.trace-line:hover { background: #1a1a2e; }
+
+.trace-ts { color: #555; flex-shrink: 0; min-width: 5rem; }
+.trace-node { font-weight: 700; flex-shrink: 0; min-width: 6rem; }
+.trace-node.input { color: #f59e0b; }
+.trace-node.output { color: #34d399; }
+.trace-node.memorizer { color: #c084fc; }
+.trace-node.runtime { color: #60a5fa; }
+
+.trace-event { color: #888; flex-shrink: 0; min-width: 6rem; }
+
+.trace-data { color: #ccc; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
+.trace-data.instruction { color: #22c55e; }
+.trace-data.error { color: #ef4444; }
+.trace-data.state { color: #c084fc; }
+.trace-data.context { color: #666; }
+
+/* Expandable trace detail */
+.trace-line.expandable { cursor: pointer; }
+.trace-detail { display: none; padding: 0.3rem 0.4rem 0.3rem 12rem; font-size: 0.65rem; color: #777; white-space: pre-wrap; word-break: break-all; max-height: 10rem; overflow-y: auto; background: #0d0d14; border-bottom: 1px solid #1a1a2e; }
+.trace-detail.open { display: block; }