diff --git a/agent/nodes/base.py b/agent/nodes/base.py index c7e57c1..c416330 100644 --- a/agent/nodes/base.py +++ b/agent/nodes/base.py @@ -18,6 +18,9 @@ class Node: self.context_fill_pct = 0 async def hud(self, event: str, **data): + # Always include model on context events so frontend knows what model each node uses + if event == "context" and self.model: + data["model"] = self.model await self.send_hud({"node": self.name, "event": event, **data}) def trim_context(self, messages: list[dict]) -> list[dict]: diff --git a/agent/nodes/eras_expert.py b/agent/nodes/eras_expert.py index dbf5316..e9523c8 100644 --- a/agent/nodes/eras_expert.py +++ b/agent/nodes/eras_expert.py @@ -1,4 +1,8 @@ -"""Eras Expert: heating/energy customer database specialist.""" +"""Eras Expert: heating cost billing domain specialist. + +Eras is a German software company for Heizkostenabrechnung (heating cost billing). +Users are Hausverwaltungen and Messdienste who manage properties, meters, and billings. +""" import asyncio import logging @@ -13,52 +17,97 @@ class ErasExpertNode(ExpertNode): name = "eras_expert" default_database = "eras2_production" - DOMAIN_SYSTEM = """You are the Eras expert — specialist for heating and energy customer data. -You work with the eras2_production database containing customer, device, and billing data. -All table and column names are German (lowercase). Common queries involve customer lookups, -device counts, consumption analysis, and billing reports.""" + DOMAIN_SYSTEM = """You are the Eras domain expert — specialist for heating cost billing (Heizkostenabrechnung). + +BUSINESS CONTEXT: +Eras is a German software company. The software manages Heizkostenabrechnung according to German law (HeizKV). +The USER of this software is a Hausverwaltung (property management) or Messdienst (metering service). +They use Eras to manage their customers' properties, meters, consumption readings, and billings. + +DOMAIN MODEL (how the data relates): +- Kunden (customers) = the Hausverwaltungen or property managers that the Eras user serves + Each Kunde has a Kundennummer and contact data (Name, Adresse, etc.) + +- Objekte (properties/buildings/Liegenschaften) = physical buildings managed by a Kunde + A Kunde can have many Objekte. Each Objekt has an address and is linked to a Kunde. + +- Nutzeinheiten (usage units/apartments) = individual units within an Objekt + An Objekt contains multiple Nutzeinheiten (e.g., Wohnung 1, Wohnung 2). + Each Nutzeinheit has Nutzer (tenants/occupants). + +- Geraete (devices/meters) = measurement devices installed in Nutzeinheiten + Heizkostenverteiler, Waermezaehler, Wasserzaehler, etc. + Each Geraet is linked to a Nutzeinheit and has a Geraetetyp. + +- Geraeteverbraeuche (consumption readings) = measured values from Geraete + Ablesewerte collected by Monteure or remote reading systems. + +- Abrechnungen (billings) = Heizkostenabrechnungen generated per Objekt/period + The core output: distributes heating costs to Nutzeinheiten based on consumption. + +- Auftraege (work orders) = tasks for Monteure (technicians) + Device installation, reading collection, maintenance. + +HIERARCHY: Kunde → Objekte → Nutzeinheiten → Geraete → Verbraeuche + → Nutzer + Kunde → Abrechnungen + Kunde → Auftraege + +IMPORTANT NOTES: +- All table/column names are German, lowercase +- Foreign keys often use patterns like KundenID, ObjektID, NutzeinheitID +- The database is eras2_production +- Always DESCRIBE tables before writing JOINs to verify actual column names +- Common user questions: customer overview, device counts, billing status, Objekt details""" SCHEMA = """Known tables (eras2_production): -- kunden — customers -- objekte — properties/objects linked to customers -- nutzeinheit — usage units within objects -- geraete — devices/meters -- geraeteverbraeuche — device consumption readings -- abrechnungen — billing records +- kunden — customers (Hausverwaltungen) +- objekte — properties/buildings (Liegenschaften) +- nutzeinheit — apartments/units within Objekte +- nutzer — tenants/occupants of Nutzeinheiten +- geraete — measurement devices (Heizkostenverteiler, etc.) +- geraeteverbraeuche — consumption readings +- abrechnungen — heating cost billings +- auftraege — work orders for Monteure +- auftragspositionen — line items within Auftraege +- geraetetypen — device type catalog +- geraetekatalog — device model catalog +- heizbetriebskosten — heating operation costs +- nebenkosten — additional costs (Nebenkosten) -CRITICAL: You do NOT know the exact column names. They are German and unpredictable. -Your FIRST tool_sequence step for ANY SELECT query MUST be DESCRIBE on the target table. -Then use the actual column names from the DESCRIBE result in your SELECT. +KNOWN PRIMARY KEYS AND FOREIGN KEYS: +- kunden: PK = Kundennummer (int), name columns: Name1, Name2, Name3 +- objekte: PK = ObjektID, FK = KundenID → kunden.Kundennummer +- nutzeinheit: FK = ObjektID → objekte.ObjektID +- geraete: FK = NutzeinheitID → nutzeinheit.NutzeinheitID (verify with DESCRIBE) -Example tool_sequence for "show me 5 customers": +IMPORTANT: Always DESCRIBE tables you haven't seen before to verify column names. +Use the FK mappings above for JOINs. Do NOT guess — use exact column names. + +Example for "how many Objekte per Kunde": [ - {{"tool": "query_db", "args": {{"query": "DESCRIBE kunden", "database": "eras2_production"}}}}, - {{"tool": "query_db", "args": {{"query": "SELECT * FROM kunden LIMIT 5", "database": "eras2_production"}}}} + {{"tool": "query_db", "args": {{"query": "SELECT k.Kundennummer, k.Name1, COUNT(o.ObjektID) as AnzahlObjekte FROM kunden k LEFT JOIN objekte o ON o.KundenID = k.Kundennummer GROUP BY k.Kundennummer, k.Name1 ORDER BY AnzahlObjekte DESC LIMIT 20", "database": "eras2_production"}}}} ]""" def __init__(self, send_hud, process_manager=None): super().__init__(send_hud, process_manager) - self._schema_cache: dict[str, str] = {} # table_name -> DESCRIBE result + self._schema_cache: dict[str, str] = {} async def execute(self, job: str, language: str = "de"): """Execute with schema auto-discovery. Caches DESCRIBE results.""" - # Inject cached schema into the job context if self._schema_cache: schema_ctx = "Known column names from previous DESCRIBE:\n" for table, desc in self._schema_cache.items(): - # Just first 5 lines to keep it compact - lines = desc.strip().split("\n")[:6] + lines = desc.strip().split("\n")[:8] schema_ctx += f"\n{table}:\n" + "\n".join(lines) + "\n" job = job + "\n\n" + schema_ctx result = await super().execute(job, language) - # Cache any DESCRIBE results from this execution - # Parse from tool_output if it looks like a DESCRIBE result + # Cache DESCRIBE results if result.tool_output and "Field\t" in result.tool_output: - # Try to identify which table was described - for table in ["kunden", "objekte", "nutzeinheit", "geraete", - "geraeteverbraeuche", "abrechnungen"]: + for table in ["kunden", "objekte", "nutzeinheit", "nutzer", "geraete", + "geraeteverbraeuche", "abrechnungen", "auftraege"]: if table in job.lower() or table in result.tool_output.lower(): self._schema_cache[table] = result.tool_output log.info(f"[eras] cached schema for {table}") diff --git a/agent/nodes/expert_base.py b/agent/nodes/expert_base.py index df88f21..1a43d2f 100644 --- a/agent/nodes/expert_base.py +++ b/agent/nodes/expert_base.py @@ -77,22 +77,67 @@ Write a concise, natural response. 1-3 sentences. super().__init__(send_hud) async def execute(self, job: str, language: str = "de") -> ThoughtResult: - """Execute a self-contained job. Returns ThoughtResult.""" + """Execute a self-contained job. Returns ThoughtResult. + Uses iterative plan-execute: if DESCRIBE queries are in the plan, + execute them first, inject results into a re-plan, then execute the rest.""" await self.hud("thinking", detail=f"planning: {job[:80]}") # Step 1: Plan tool sequence + schema_context = self.SCHEMA plan_messages = [ {"role": "system", "content": self.PLAN_SYSTEM.format( - domain=self.DOMAIN_SYSTEM, schema=self.SCHEMA, + domain=self.DOMAIN_SYSTEM, schema=schema_context, database=self.default_database)}, {"role": "user", "content": f"Job: {job}"}, ] plan_raw = await llm_call(self.model, plan_messages) tool_sequence, response_hint = self._parse_plan(plan_raw) + # Step 1b: Execute DESCRIBE queries first, then re-plan with actual schema + describe_results = {} + remaining_tools = [] + for step in tool_sequence: + if step.get("tool") == "query_db": + query = step.get("args", {}).get("query", "").strip().upper() + if query.startswith("DESCRIBE") or query.startswith("SHOW"): + await self.hud("tool_call", tool="query_db", args=step.get("args", {})) + try: + result = await asyncio.to_thread( + run_db_query, step["args"]["query"], + step["args"].get("database", self.default_database)) + describe_results[step["args"]["query"]] = result + await self.hud("tool_result", tool="query_db", output=result[:200]) + except Exception as e: + await self.hud("tool_result", tool="query_db", output=str(e)[:200]) + else: + remaining_tools.append(step) + else: + remaining_tools.append(step) + + # Re-plan if we got DESCRIBE results (now we know actual column names) + if describe_results: + schema_update = "Actual column names from DESCRIBE:\n" + for q, result in describe_results.items(): + schema_update += f"\n{q}:\n{result[:500]}\n" + + replan_messages = [ + {"role": "system", "content": self.PLAN_SYSTEM.format( + domain=self.DOMAIN_SYSTEM, + schema=schema_context + "\n\n" + schema_update, + database=self.default_database)}, + {"role": "user", "content": f"Job: {job}\n\nUse ONLY the actual column names from DESCRIBE above. Do NOT include DESCRIBE steps — they are already done."}, + ] + replan_raw = await llm_call(self.model, replan_messages) + new_tools, new_hint = self._parse_plan(replan_raw) + if new_tools: + remaining_tools = new_tools + if new_hint: + response_hint = new_hint + + tool_sequence = remaining_tools await self.hud("planned", tools=len(tool_sequence), hint=response_hint[:80]) - # Step 2: Execute tools + # Step 2: Execute remaining tools actions = [] state_updates = {} display_items = [] diff --git a/agent/nodes/pa_v1.py b/agent/nodes/pa_v1.py index 0007cba..5b3cb7f 100644 --- a/agent/nodes/pa_v1.py +++ b/agent/nodes/pa_v1.py @@ -57,7 +57,7 @@ Rules: {memory_context}""" EXPERT_DESCRIPTIONS = { - "eras": "eras — heating/energy domain. Database: eras2_production (customers, devices, billing, consumption). Can also build dashboard UI (buttons, machines, counters, tables) for energy data workflows.", + "eras": "eras — Heizkostenabrechnung (German heating cost billing). Users are Hausverwaltungen managing Kunden, Objekte (buildings), Nutzeinheiten (apartments), Geraete (meters), Verbraeuche (readings), Abrechnungen (billings), Auftraege (work orders). Hierarchy: Kunde > Objekte > Nutzeinheiten > Geraete > Verbraeuche. Database: eras2_production. Can also build dashboard UI.", "plankiste": "plankiste — Kita planning domain. Database: plankiste_test (children, care schedules, offers, pricing). Can build dashboard UI for education workflows and generate Angebote.", } diff --git a/agent/runtime.py b/agent/runtime.py index eab77d5..1a22046 100644 --- a/agent/runtime.py +++ b/agent/runtime.py @@ -17,7 +17,7 @@ log = logging.getLogger("runtime") TRACE_FILE = Path(__file__).parent.parent / "trace.jsonl" # Default graph — can be switched at runtime -_active_graph_name = "v1-current" +_active_graph_name = "v4-eras" class OutputSink: diff --git a/static/app.js b/static/app.js index 9be4e35..1d3a6cc 100644 --- a/static/app.js +++ b/static/app.js @@ -953,6 +953,24 @@ function send() { inputEl.value = ''; } +async function clearSession() { + try { + const headers = { 'Content-Type': 'application/json' }; + if (authToken) headers['Authorization'] = 'Bearer ' + authToken; + await fetch('/api/clear', { method: 'POST', headers }); + // Clear UI + msgs.innerHTML = ''; + traceEl.innerHTML = ''; + _currentDashboard = []; + currentEl = null; + const dock = document.getElementById('dock'); + if (dock) dock.innerHTML = ''; + addTrace('runtime', 'cleared', 'session reset'); + } catch (e) { + addTrace('runtime', 'error', 'clear failed: ' + e); + } +} + // --- Awareness panel updates --- let _sensorReadings = {}; diff --git a/static/index.html b/static/index.html index 97df0c5..6636cdd 100644 --- a/static/index.html +++ b/static/index.html @@ -16,6 +16,8 @@

cog

+
+
disconnected
@@ -27,17 +29,7 @@
Nodes
-
-
input
-
director
-
PA
-
thinker
-
eras
-
output
-
memo
-
interp
-
sensor
-
+
Graph @@ -58,7 +50,6 @@
-
diff --git a/static/js/awareness.js b/static/js/awareness.js index 736b371..4e08f63 100644 --- a/static/js/awareness.js +++ b/static/js/awareness.js @@ -1,9 +1,133 @@ -/** Awareness panel: memorizer state, sensor readings, node meters. */ +/** Awareness panel: memorizer state, sensor readings. + * Node detail panel: per-node model, tokens, progress, last event. + */ import { esc, truncate } from './util.js'; let _sensorReadings = {}; +// --- Node state tracker --- +const _nodeState = {}; // { nodeName: { model, tokens, maxTokens, fillPct, lastEvent, lastDetail, status, toolCalls, startedAt } } + +function _getNode(name) { + if (!_nodeState[name]) { + _nodeState[name] = { + model: '', tokens: 0, maxTokens: 0, fillPct: 0, + lastEvent: '', lastDetail: '', status: 'idle', + toolCalls: 0, lastTool: '', + }; + } + return _nodeState[name]; +} + +export function updateNodeFromHud(node, event, data) { + const n = _getNode(node); + + if (event === 'context') { + if (data.model) n.model = data.model.replace('google/', '').replace('anthropic/', ''); + if (data.tokens !== undefined) n.tokens = data.tokens; + if (data.max_tokens !== undefined) n.maxTokens = data.max_tokens; + if (data.fill_pct !== undefined) n.fillPct = data.fill_pct; + } + + if (event === 'thinking') { + n.status = 'thinking'; + n.lastEvent = 'thinking'; + n.lastDetail = data.detail || ''; + } else if (event === 'perceived') { + n.status = 'done'; + n.lastEvent = 'perceived'; + const a = data.analysis || {}; + n.lastDetail = `${a.intent || '?'}/${a.language || '?'}/${a.tone || '?'}`; + } else if (event === 'decided' || event === 'routed') { + n.status = 'done'; + n.lastEvent = event; + n.lastDetail = data.goal || data.instruction || data.job || ''; + } else if (event === 'tool_call') { + n.status = 'tool'; + n.lastEvent = 'tool_call'; + n.lastTool = data.tool || ''; + n.lastDetail = data.tool || ''; + n.toolCalls++; + } else if (event === 'tool_result') { + n.lastEvent = 'tool_result'; + n.lastDetail = truncate(data.output || '', 50); + } else if (event === 'streaming') { + n.status = 'streaming'; + n.lastEvent = 'streaming'; + } else if (event === 'done') { + n.status = 'done'; + n.lastEvent = 'done'; + } else if (event === 'updated') { + n.status = 'done'; + n.lastEvent = 'updated'; + } else if (event === 'planned') { + n.status = 'planned'; + n.lastEvent = 'planned'; + n.lastDetail = `${data.tools || 0} tools`; + } else if (event === 'interpreted') { + n.status = 'done'; + n.lastEvent = 'interpreted'; + n.lastDetail = truncate(data.summary || '', 50); + } + + renderNodes(); +} + +function renderNodes() { + const el = document.getElementById('node-metrics'); + if (!el) { console.warn('[nodes] #node-metrics not found'); return; } + + // Sort: active nodes first, then by name + const statusOrder = { thinking: 0, tool: 0, streaming: 0, planned: 1, done: 2, idle: 3 }; + const sorted = Object.entries(_nodeState) + .filter(([name]) => name !== 'runtime' && name !== 'frame_engine') + .sort((a, b) => (statusOrder[a[1].status] || 3) - (statusOrder[b[1].status] || 3)); + + let html = ''; + for (const [name, n] of sorted) { + const statusClass = n.status === 'thinking' || n.status === 'tool' ? 'nm-active' + : n.status === 'streaming' ? 'nm-streaming' : ''; + const shortName = name.replace('_v1', '').replace('_v2', '').replace('expert_', ''); + const modelShort = n.model ? n.model.split('/').pop().replace('-001', '').replace('-4.5', '4.5') : ''; + const tokenStr = n.maxTokens ? `${n.tokens}/${n.maxTokens}t` : ''; + const fillW = n.fillPct || 0; + const detail = n.lastDetail ? truncate(n.lastDetail, 45) : ''; + const toolStr = n.toolCalls > 0 ? ` [${n.toolCalls} calls]` : ''; + + html += `
+
+ ${esc(shortName)} + ${esc(modelShort)} + ${esc(tokenStr)} +
+
+
+ ${esc(n.lastEvent)} + ${esc(detail)}${esc(toolStr)} +
+
`; + } + el.innerHTML = html; +} + +export function clearNodes() { + for (const key of Object.keys(_nodeState)) delete _nodeState[key]; + const el = document.getElementById('node-metrics'); + if (el) el.innerHTML = ''; +} + +// Keep old meter function for backward compat (called from ws.js) +export function updateMeter(node, tokens, maxTokens, fillPct) { + const n = _getNode(node); + n.tokens = tokens; + n.maxTokens = maxTokens; + n.fillPct = fillPct; + renderNodes(); +} + +// --- Awareness: memorizer state --- + export function updateAwarenessState(state) { const body = document.getElementById('aw-state-body'); if (!body) return; @@ -33,6 +157,8 @@ export function updateAwarenessState(state) { body.innerHTML = html; } +// --- Awareness: sensor readings --- + export function updateAwarenessSensors(tick, deltas) { const body = document.getElementById('aw-sensor-body'); if (!body) return; @@ -46,12 +172,3 @@ export function updateAwarenessSensors(tick, deltas) { } body.innerHTML = html; } - -export function updateMeter(node, tokens, maxTokens, fillPct) { - const meter = document.getElementById('meter-' + node); - if (!meter) return; - const bar = meter.querySelector('.nm-bar'); - const text = meter.querySelector('.nm-text'); - if (bar) bar.style.width = fillPct + '%'; - if (text) text.textContent = `${tokens}/${maxTokens}t`; -} diff --git a/static/js/main.js b/static/js/main.js index 29899b0..e6d57b1 100644 --- a/static/js/main.js +++ b/static/js/main.js @@ -4,6 +4,7 @@ import { initAuth, authToken, startLogin } from './auth.js'; import { initTrace, addTrace, clearTrace } from './trace.js'; import { initChat, clearChat } from './chat.js'; import { clearDashboard } from './dashboard.js'; +import { clearNodes } from './awareness.js'; import { initGraph } from './graph.js'; import { connect } from './ws.js'; @@ -12,10 +13,13 @@ window.addEventListener('load', async () => { initTrace(); initChat(); await initGraph(); - await initAuth(() => connect()); + await initAuth(() => { + connect(); + loadGraphSwitcher(); + }); }); -// Clear session button +// Clear session window.clearSession = async () => { try { const headers = { 'Content-Type': 'application/json' }; @@ -24,11 +28,63 @@ window.clearSession = async () => { clearChat(); clearTrace(); clearDashboard(); + clearNodes(); addTrace('runtime', 'cleared', 'session reset'); } catch (e) { addTrace('runtime', 'error', 'clear failed: ' + e); } }; -// Login button +// Graph switcher — loads available graphs and shows buttons in top bar +async function loadGraphSwitcher() { + const container = document.getElementById('graph-switcher'); + if (!container) { console.error('[main] no #graph-switcher'); return; } + try { + const headers = {}; + if (authToken) headers['Authorization'] = 'Bearer ' + authToken; + const r = await fetch('/api/graph/list', { headers }); + if (!r.ok) { console.error('[main] graph/list failed:', r.status); return; } + const data = await r.json(); + const graphs = data.graphs || data || []; + console.log('[main] graphs:', graphs.length); + + // Get current active graph + let activeGraph = ''; + try { + const ar = await fetch('/api/graph/active', { headers }); + if (ar.ok) { + const ag = await ar.json(); + activeGraph = ag.name || ''; + } + } catch (e) {} + + container.innerHTML = graphs.map(g => { + const active = g.name === activeGraph; + return ``; + }).join(''); + } catch (e) {} +} + +window.switchGraph = async (name) => { + try { + const headers = { 'Content-Type': 'application/json' }; + if (authToken) headers['Authorization'] = 'Bearer ' + authToken; + await fetch('/api/graph/switch', { + method: 'POST', headers, + body: JSON.stringify({ name }), + }); + addTrace('runtime', 'graph_switch', name); + clearChat(); + clearTrace(); + clearDashboard(); + clearNodes(); + addTrace('runtime', 'switched', `graph: ${name}`); + await initGraph(); + loadGraphSwitcher(); + } catch (e) { + addTrace('runtime', 'error', 'switch failed: ' + e); + } +}; + +// Login window.startLogin = startLogin; diff --git a/static/js/ws.js b/static/js/ws.js index 25a8950..491cbe0 100644 --- a/static/js/ws.js +++ b/static/js/ws.js @@ -5,7 +5,7 @@ import { addTrace } from './trace.js'; import { handleDelta, handleDone, setWs as setChatWs } from './chat.js'; import { dockControls, setWs as setDashWs } from './dashboard.js'; import { graphAnimate } from './graph.js'; -import { updateMeter, updateAwarenessState, updateAwarenessSensors } from './awareness.js'; +import { updateMeter, updateNodeFromHud, updateAwarenessState, updateAwarenessSensors } from './awareness.js'; import { updateTestStatus } from './tests.js'; import { truncate, esc } from './util.js'; @@ -35,7 +35,8 @@ export function connect() { ws.onerror = () => {}; ws.onclose = (e) => { - if (e.code === 4001 || e.code === 1006) { + // 4001 = explicit auth rejection from server + if (e.code === 4001) { setAuthFailed(true); localStorage.removeItem('cog_token'); localStorage.removeItem('cog_access_token'); @@ -44,9 +45,10 @@ export function connect() { showLogin(); return; } - document.getElementById('status').textContent = 'disconnected'; - document.getElementById('status').style.color = '#666'; - addTrace('runtime', 'disconnected', 'ws closed'); + // 1006 = abnormal close (deploy, network), just reconnect + document.getElementById('status').textContent = 'reconnecting...'; + document.getElementById('status').style.color = '#f59e0b'; + addTrace('runtime', 'disconnected', `code ${e.code}, reconnecting...`); setTimeout(connect, 2000); }; @@ -123,6 +125,7 @@ function handleHud(data) { const event = data.event || ''; graphAnimate(event, node); + updateNodeFromHud(node, event, data); if (event === 'context') { const count = (data.messages || []).length; diff --git a/static/style.css b/static/style.css index d5f6963..22bdcd6 100644 --- a/static/style.css +++ b/static/style.css @@ -10,10 +10,16 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0; #test-status .ts-pass { color: #22c55e; } #test-status .ts-fail { color: #ef4444; } @keyframes pulse-text { 0%,100% { opacity: 1; } 50% { opacity: 0.5; } } +.btn-top { padding: 0.2rem 0.6rem; font-size: 0.7rem; background: #333; } +.btn-top:hover { background: #ef4444; } +#graph-switcher { display: flex; gap: 3px; } +.btn-graph { padding: 0.2rem 0.5rem; font-size: 0.65rem; font-family: monospace; background: #1a1a1a; color: #888; border: 1px solid #333; border-radius: 3px; cursor: pointer; } +.btn-graph:hover { color: #fff; border-color: #2563eb; } +.btn-graph.active { color: #22c55e; border-color: #22c55e; background: #0a1e14; } /* === Two-row layout === */ /* Middle row: workspace | node detail | graph */ -#middle-row { display: grid; grid-template-columns: 1fr 200px 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; } +#middle-row { display: grid; grid-template-columns: 1fr 300px 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; } /* Bottom row: chat | awareness | trace */ #bottom-row { display: grid; grid-template-columns: 1fr 1fr 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; } @@ -36,12 +42,19 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0; /* Node detail / metrics */ .detail-panel { display: flex; flex-direction: column; } -#node-metrics { flex: 1; overflow-y: auto; padding: 0.3rem; display: flex; flex-direction: column; gap: 1px; } -.node-meter { display: flex; align-items: center; gap: 0.3rem; padding: 0.2rem 0.4rem; background: #111; border-radius: 2px; } -.nm-label { font-size: 0.6rem; font-weight: 700; text-transform: uppercase; letter-spacing: 0.03em; min-width: 3.5rem; color: #888; } -.nm-bar { flex: 1; height: 5px; background: #1a1a1a; border-radius: 3px; overflow: hidden; } -.nm-fill { height: 100%; width: 0%; border-radius: 3px; transition: width 0.3s; background: #333; } -.nm-text { font-size: 0.55rem; color: #555; min-width: 3rem; text-align: right; font-family: monospace; } +#node-metrics { flex: 1; overflow-y: auto; padding: 0.3rem; display: flex; flex-direction: column; gap: 2px; } +.node-card { background: #111; border-radius: 3px; padding: 0.25rem 0.4rem; border-left: 2px solid #333; } +.node-card.nm-active { border-left-color: #f59e0b; background: #1a1408; } +.node-card.nm-streaming { border-left-color: #22c55e; background: #0a1e14; } +.nc-header { display: flex; align-items: center; gap: 0.3rem; } +.nc-name { font-size: 0.65rem; font-weight: 700; text-transform: uppercase; color: #e0e0e0; min-width: 3rem; } +.nc-model { font-size: 0.55rem; color: #666; font-family: monospace; } +.nc-tokens { font-size: 0.55rem; color: #555; font-family: monospace; margin-left: auto; } +.nc-bar { height: 3px; background: #1a1a1a; border-radius: 2px; overflow: hidden; margin: 2px 0; } +.nc-fill { height: 100%; border-radius: 2px; background: #333; transition: width 0.3s; } +.nc-status { display: flex; gap: 0.3rem; align-items: baseline; } +.nc-event { font-size: 0.55rem; color: #888; font-family: monospace; } +.nc-detail { font-size: 0.55rem; color: #666; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } /* Graph panel */ .graph-panel { display: flex; flex-direction: column; } diff --git a/testcases/domain_context.md b/testcases/domain_context.md new file mode 100644 index 0000000..dcf0ca0 --- /dev/null +++ b/testcases/domain_context.md @@ -0,0 +1,40 @@ +# Domain Context + +Tests that the expert understands the Eras business domain: +Heizkostenabrechnung, Kunde→Objekt→Nutzeinheit→Geraet hierarchy, +and can formulate correct JOINs without guessing column names. + +## Setup +- clear history + +## Steps + +### 1. Expert knows the hierarchy +- send: wie viele Objekte haben Kunden im Durchschnitt? +- expect_trace: has tool_call +- expect_response: not contains "Error" or "error" or "Unknown column" +- expect_response: length > 20 + +### 2. Expert can JOIN kunden and objekte +- send: zeig mir die Top 5 Kunden mit den meisten Objekten +- expect_trace: has tool_call +- expect_response: not contains "Error" or "error" or "Unknown column" +- expect_response: length > 20 + +### 3. Expert understands Nutzeinheiten belong to Objekte +- send: how many Nutzeinheiten does the system have total? +- expect_trace: has tool_call +- expect_response: not contains "Error" or "error" or "Unknown column" +- expect_response: length > 10 + +### 4. Expert understands Geraete belong to Nutzeinheiten +- send: which Objekt has the most Geraete? +- expect_trace: has tool_call +- expect_response: not contains "Error" or "error" or "Unknown column" +- expect_response: length > 20 + +### 5. PA formulates good job descriptions +- send: gib mir eine Uebersicht ueber Kunde 2 +- expect_trace: has routed +- expect_response: length > 20 +- expect_response: not contains "clarify" or "specify" or "what kind"