diff --git a/agent/__init__.py b/agent/__init__.py index 3141200..af08a39 100644 --- a/agent/__init__.py +++ b/agent/__init__.py @@ -14,6 +14,7 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message from fastapi import FastAPI from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles +from starlette.responses import Response from .api import register_routes @@ -27,7 +28,9 @@ register_routes(app) # Serve index.html explicitly, then static assets @app.get("/") async def index(): - return FileResponse(STATIC_DIR / "index.html") + resp = FileResponse(STATIC_DIR / "index.html") + resp.headers["Cache-Control"] = "no-cache" + return resp @app.get("/callback") async def callback(): diff --git a/agent/nodes/output.py b/agent/nodes/output.py index 517ae53..2ecc766 100644 --- a/agent/nodes/output.py +++ b/agent/nodes/output.py @@ -51,8 +51,8 @@ YOUR JOB: Transform the Thinker's reasoning into a natural, human-readable text thinker_ctx = f"Thinker response: {thought.response}" if thought.tool_used: thinker_ctx += f"\n\nTool used: {thought.tool_used}\nTool output:\n{thought.tool_output}" - if thought.controls: - thinker_ctx += f"\n\n(UI controls were also sent to the user: {len(thought.controls)} elements)" + if thought.actions: + thinker_ctx += f"\n\n(UI buttons shown to user: {', '.join(a.get('label','') for a in thought.actions)})" messages.append({"role": "system", "content": thinker_ctx}) messages = self.trim_context(messages) diff --git a/agent/nodes/sensor.py b/agent/nodes/sensor.py index 6ac65b0..fb647b2 100644 --- a/agent/nodes/sensor.py +++ b/agent/nodes/sensor.py @@ -3,13 +3,14 @@ import asyncio import logging import time -from datetime import datetime, timezone, timedelta +from datetime import datetime +from zoneinfo import ZoneInfo from .base import Node log = logging.getLogger("runtime") -BERLIN = timezone(timedelta(hours=2)) # CEST +BERLIN = ZoneInfo("Europe/Berlin") class SensorNode(Node): diff --git a/agent/nodes/thinker.py b/agent/nodes/thinker.py index 1c7fd04..df71b4b 100644 --- a/agent/nodes/thinker.py +++ b/agent/nodes/thinker.py @@ -24,7 +24,28 @@ TOOLS — write a ```python code block and it WILL be executed. Use print() for - For math, databases, file ops, any computation: write python. NEVER describe code — write it. - For simple conversation: respond directly as text. -A separate UI node handles all visual controls (buttons, tables). Just focus on reasoning and content. +ACTIONS — ALWAYS end your response with an ACTIONS: line containing a JSON array. +The ACTIONS line MUST be the very last line of your response. + +Format: ACTIONS: [json array of actions] + +Examples: + User asks about dog breeds: + Here are three popular dog breeds: Golden Retriever, German Shepherd, and Poodle. + ACTIONS: [{{"label": "Golden Retriever", "action": "learn_breed", "payload": {{"breed": "Golden Retriever"}}}}, {{"label": "German Shepherd", "action": "learn_breed", "payload": {{"breed": "German Shepherd"}}}}, {{"label": "Poodle", "action": "learn_breed", "payload": {{"breed": "Poodle"}}}}] + + User asks what time it is: + Es ist 14:30 Uhr. + ACTIONS: [] + + After creating a database: + Done! Created 5 customers in the database. + ACTIONS: [{{"label": "Show All", "action": "show_all"}}, {{"label": "Add Customer", "action": "add_customer"}}] + +Rules: +- ALWAYS include the ACTIONS: line, even if empty: ACTIONS: [] +- Keep labels short (2-4 words), action is snake_case. +- Only include meaningful actions — empty array is fine for simple chat. {memory_context}""" @@ -89,6 +110,24 @@ conn.close()''' text = re.sub(r'```(?:python|py|sql|sqlite|sh|bash|tool_code).*?```', '', response, flags=re.DOTALL) return text.strip() + def _parse_actions(self, response: str) -> tuple[str, list[dict]]: + """Extract ACTIONS: JSON line from response. Returns (clean_text, actions).""" + actions = [] + lines = response.split("\n") + clean_lines = [] + for line in lines: + stripped = line.strip() + if stripped.startswith("ACTIONS:"): + try: + actions = json.loads(stripped[8:].strip()) + if not isinstance(actions, list): + actions = [] + except (json.JSONDecodeError, Exception): + pass + else: + clean_lines.append(line) + return "\n".join(clean_lines).strip(), actions + async def process(self, command: Command, history: list[dict], memory_context: str = "") -> ThoughtResult: await self.hud("thinking", detail="reasoning about response") @@ -120,18 +159,26 @@ conn.close()''' log.info(f"[thinker] tool output: {tool_output[:200]}") - # Second call: interpret tool output + # Second call: interpret tool output + suggest actions messages.append({"role": "assistant", "content": response}) messages.append({"role": "system", "content": f"Tool output:\n{tool_output}"}) - messages.append({"role": "user", "content": "Respond to the user based on the tool output. Be natural and concise."}) + messages.append({"role": "user", "content": "Respond to the user based on the tool output. Be natural and concise. End with ACTIONS: [json array] on the last line (empty array if no actions)."}) messages = self.trim_context(messages) final = await llm_call(self.model, messages) + if not final: + final = "[no response from LLM]" clean_text = self._strip_code_blocks(final) + clean_text, actions = self._parse_actions(clean_text) + if actions: + log.info(f"[thinker] actions: {actions}") await self.hud("decided", instruction=clean_text[:200]) return ThoughtResult(response=clean_text, tool_used=tool_name, - tool_output=tool_output) + tool_output=tool_output, actions=actions) clean_text = self._strip_code_blocks(response) or response + clean_text, actions = self._parse_actions(clean_text) + if actions: + log.info(f"[thinker] actions: {actions}") await self.hud("decided", instruction="direct response (no tools)") - return ThoughtResult(response=clean_text) + return ThoughtResult(response=clean_text, actions=actions) diff --git a/agent/nodes/ui.py b/agent/nodes/ui.py index 5b089ac..3fce9ff 100644 --- a/agent/nodes/ui.py +++ b/agent/nodes/ui.py @@ -1,10 +1,10 @@ -"""UI Node: renders interactive elements to the awareness panel workspace.""" +"""UI Node: pure renderer — converts ThoughtResult actions + data into controls.""" import json import logging +import re from .base import Node -from ..llm import llm_call from ..types import ThoughtResult log = logging.getLogger("runtime") @@ -12,91 +12,87 @@ log = logging.getLogger("runtime") class UINode(Node): name = "ui" - model = "google/gemini-2.0-flash-001" - max_context_tokens = 3000 - - SYSTEM = """You are the UI node of a cognitive agent runtime. - -You render interactive elements to a workspace panel in the browser. A separate Output node handles all text — you NEVER write prose, explanations, or messages. - -YOUR OUTPUT: A JSON array of UI elements, or [] if nothing to show. - -ELEMENT TYPES: - -label — display a value: - {{"type": "label", "id": "unique_id", "text": "Label Text", "value": "current value"}} - -button — clickable action: - {{"type": "button", "label": "Short Label", "action": "action_name", "payload": {{"key": "value"}}}} - -table — structured data: - {{"type": "table", "columns": ["col1", "col2"], "data": [{{"col1": "val", "col2": "val"}}]}} - -RULES: -- Output ONLY a valid JSON array. No text, no markdown, no explanation. -- Labels: show key values the user asked about or that resulted from tool execution. -- Buttons: offer clear follow-up actions. Keep labels 2-4 words. Action is snake_case. -- Tables: when tool output contains structured/tabular data. -- Return [] when the response is purely conversational with no actionable data. -- Every element you emit REPLACES the entire workspace. Include all elements that should be visible. - -CURRENT WORKSPACE: -{current_controls}""" + # No model — pure code, no LLM calls def __init__(self, send_hud): super().__init__(send_hud) self.current_controls: list[dict] = [] + def _extract_table(self, tool_output: str) -> dict | None: + """Try to parse tabular data from tool output.""" + if not tool_output: + return None + lines = [l.strip() for l in tool_output.strip().split("\n") if l.strip()] + if len(lines) < 2: + return None + + # Detect pipe-separated tables (e.g. "col1 | col2\nval1 | val2") + if " | " in lines[0]: + columns = [c.strip() for c in lines[0].split(" | ")] + data = [] + for line in lines[1:]: + if line.startswith("-") or line.startswith("="): + continue # separator line + vals = [v.strip() for v in line.split(" | ")] + if len(vals) == len(columns): + data.append(dict(zip(columns, vals))) + if data: + return {"type": "table", "columns": columns, "data": data} + + # Detect "Table: X" header format from sqlite wrapper + if lines[0].startswith("Table:"): + table_name = lines[0].replace("Table:", "").strip() + if len(lines) >= 2 and " | " in lines[1]: + columns = [c.strip() for c in lines[1].split(" | ")] + data = [] + for line in lines[2:]: + vals = [v.strip() for v in line.split(" | ")] + if len(vals) == len(columns): + data.append(dict(zip(columns, vals))) + if data: + return {"type": "table", "columns": columns, "data": data} + + return None + async def process(self, thought: ThoughtResult, history: list[dict], memory_context: str = "") -> list[dict]: - await self.hud("thinking", detail="deciding UI controls") + controls = [] - # Show UI what's currently rendered - if self.current_controls: - ctrl_desc = json.dumps(self.current_controls, indent=2) - else: - ctrl_desc = "(empty)" + # 1. Render actions from Thinker as buttons + for action in thought.actions: + controls.append({ + "type": "button", + "label": action.get("label", "Action"), + "action": action.get("action", "unknown"), + "payload": action.get("payload", {}), + }) - messages = [ - {"role": "system", "content": self.SYSTEM.format(current_controls=ctrl_desc)}, - ] + # 2. Extract tables from tool output + if thought.tool_output: + table = self._extract_table(thought.tool_output) + if table: + controls.append(table) - for msg in history[-6:]: - messages.append(msg) - - ctx = f"Thinker response: {thought.response}" - if thought.tool_used: - ctx += f"\n\nTool: {thought.tool_used}\nTool output:\n{thought.tool_output}" - messages.append({"role": "system", "content": ctx}) - messages.append({"role": "user", "content": "What UI elements should the workspace show now? Return JSON array."}) - - messages = self.trim_context(messages) - await self.hud("context", messages=messages, tokens=self.last_context_tokens, - max_tokens=self.max_context_tokens, fill_pct=self.context_fill_pct) - - raw = await llm_call(self.model, messages) - log.info(f"[ui] raw: {raw[:200]}") - - text = raw.strip() - if text.startswith("```"): - text = text.split("\n", 1)[1] if "\n" in text else text[3:] - if text.endswith("```"): - text = text[:-3] - text = text.strip() - - try: - controls = json.loads(text) - if not isinstance(controls, list): - controls = [] - except (json.JSONDecodeError, Exception) as e: - log.error(f"[ui] parse error: {e}, raw: {text[:200]}") - controls = [] + # 3. Add labels for key tool results (single-value outputs) + if thought.tool_used and thought.tool_output and not any(c["type"] == "table" for c in controls): + output = thought.tool_output.strip() + # Short single-line output → label + if "\n" not in output and len(output) < 100: + controls.append({ + "type": "label", + "id": "tool_result", + "text": thought.tool_used, + "value": output, + }) if controls: self.current_controls = controls await self.hud("controls", controls=controls) log.info(f"[ui] emitting {len(controls)} controls") else: - await self.hud("decided", instruction="no controls needed") + if self.current_controls: + # Keep previous controls visible + controls = self.current_controls + await self.hud("decided", instruction="no new controls") return controls diff --git a/agent/types.py b/agent/types.py index ec52721..04ae09a 100644 --- a/agent/types.py +++ b/agent/types.py @@ -26,4 +26,4 @@ class ThoughtResult: response: str tool_used: str = "" tool_output: str = "" - controls: list = field(default_factory=list) + actions: list = field(default_factory=list) # [{label, action, payload?}] diff --git a/static/app.js b/static/app.js index dbe2c25..53b98af 100644 --- a/static/app.js +++ b/static/app.js @@ -90,7 +90,10 @@ function randomString(len) { // --- WebSocket --- +let _authFailed = false; + function connect() { + if (_authFailed) return; const proto = location.protocol === 'https:' ? 'wss:' : 'ws:'; let wsUrl = proto + '//' + location.host + '/ws'; if (authToken) { @@ -105,7 +108,21 @@ function connect() { addTrace('runtime', 'connected', 'ws open'); }; - ws.onclose = () => { + ws.onerror = () => {}; // swallow — onclose handles it + + ws.onclose = (e) => { + // 4001 = explicit auth rejection, 1006 = HTTP 403 before upgrade + if (e.code === 4001 || e.code === 1006) { + _authFailed = true; + localStorage.removeItem('cog_token'); + localStorage.removeItem('cog_access_token'); + authToken = null; + statusEl.textContent = 'session expired'; + statusEl.style.color = '#ef4444'; + addTrace('runtime', 'auth expired', 'please log in again'); + showLogin(); + return; + } statusEl.textContent = 'disconnected'; statusEl.style.color = '#666'; addTrace('runtime', 'disconnected', 'ws closed'); @@ -135,7 +152,6 @@ function connect() { currentEl = null; } else if (data.type === 'controls') { - renderControls(data.controls); dockControls(data.controls); } }; @@ -178,12 +194,10 @@ function handleHud(data) { } else if (event === 'process_start') { addTrace(node, 'run ' + (data.tool || 'python'), truncate(data.code || '', 80), 'instruction', data.code); - showProcessCard(data.pid, data.tool || 'python', data.code || ''); showAwarenessProcess(data.pid, data.tool || 'python', data.code || ''); } else if (event === 'process_done') { addTrace(node, (data.exit_code === 0 ? 'done' : 'failed'), truncate(data.output || '', 80), data.exit_code === 0 ? '' : 'error', data.output); - updateProcessCard(data.pid, data.exit_code === 0 ? 'done' : 'failed', data.output || '', data.elapsed); updateAwarenessProcess(data.pid, data.exit_code === 0 ? 'done' : 'failed', data.output || '', data.elapsed); } else if (event === 'error') { @@ -484,14 +498,13 @@ function updateAwarenessProcess(pid, status, output, elapsed) { if (stop) stop.remove(); const out = el.querySelector('.aw-proc-output'); if (out && output) out.textContent = output; - // Auto-remove done processes after 10s - if (status === 'done') { - setTimeout(() => { - el.remove(); - const body = document.getElementById('aw-proc-body'); - if (body && !body.children.length) body.innerHTML = 'idle'; - }, 10000); - } + // Auto-remove completed processes (done: 10s, failed: 30s) + const delay = status === 'done' ? 10000 : 30000; + setTimeout(() => { + el.remove(); + const body = document.getElementById('aw-proc-body'); + if (body && !body.children.length) body.innerHTML = 'idle'; + }, delay); } function dockControls(controls) {