Compare commits

..

No commits in common. "925fff731f1b6b618c7573551f9526b933231903" and "3a9c2795cfa7556203c49dedaaeadca8a2271c06" have entirely different histories.

35 changed files with 342 additions and 5311 deletions

View File

@ -153,29 +153,12 @@ def register_routes(app):
msg = json.loads(data) msg = json.loads(data)
# Always use current runtime (may change after graph switch) # Always use current runtime (may change after graph switch)
rt = _active_runtime or runtime rt = _active_runtime or runtime
try:
if msg.get("type") == "action": if msg.get("type") == "action":
action = msg.get("action", "unknown") await rt.handle_action(msg.get("action", "unknown"), msg.get("data"))
data_payload = msg.get("data")
if hasattr(rt, 'use_frames') and rt.use_frames:
# Frame engine handles actions as ACTION: prefix messages
action_text = f"ACTION:{action}"
if data_payload:
action_text += f"|data:{json.dumps(data_payload)}"
await rt.handle_message(action_text)
else:
await rt.handle_action(action, data_payload)
elif msg.get("type") == "cancel_process": elif msg.get("type") == "cancel_process":
rt.process_manager.cancel(msg.get("pid", 0)) rt.process_manager.cancel(msg.get("pid", 0))
else: else:
await rt.handle_message(msg.get("text", ""), dashboard=msg.get("dashboard")) await rt.handle_message(msg.get("text", ""), dashboard=msg.get("dashboard"))
except Exception as e:
import traceback
log.error(f"[ws] handler error: {e}\n{traceback.format_exc()}")
try:
await ws.send_text(json.dumps({"type": "hud", "node": "runtime", "event": "error", "detail": str(e)[:200]}))
except Exception:
pass
except WebSocketDisconnect: except WebSocketDisconnect:
if _active_runtime: if _active_runtime:
_active_runtime.detach_ws() _active_runtime.detach_ws()
@ -364,7 +347,6 @@ def register_routes(app):
"language": "en", "language": "en",
"style_hint": "casual, technical", "style_hint": "casual, technical",
"facts": [], "facts": [],
"user_expectation": "conversational",
} }
_pipeline_result = {"status": "idle", "id": "", "stage": "cleared"} _pipeline_result = {"status": "idle", "id": "", "stage": "cleared"}
# Notify frontend via WS # Notify frontend via WS
@ -398,26 +380,11 @@ def register_routes(app):
from .engine import load_graph, get_graph_for_cytoscape from .engine import load_graph, get_graph_for_cytoscape
from .runtime import _active_graph_name from .runtime import _active_graph_name
graph = load_graph(_active_graph_name) graph = load_graph(_active_graph_name)
# Include model info from instantiated nodes if runtime exists
node_details = {}
if _active_runtime:
for role, impl_name in graph["nodes"].items():
# Find the node instance by role
node_inst = getattr(_active_runtime, 'frame_engine', None)
if node_inst and hasattr(node_inst, 'nodes'):
inst = node_inst.nodes.get(role)
if inst:
node_details[role] = {
"impl": impl_name,
"model": getattr(inst, 'model', None) or '',
"max_tokens": getattr(inst, 'max_context_tokens', 0),
}
return { return {
"name": graph["name"], "name": graph["name"],
"description": graph["description"], "description": graph["description"],
"nodes": graph["nodes"], "nodes": graph["nodes"],
"edges": graph["edges"], "edges": graph["edges"],
"node_details": node_details,
"cytoscape": get_graph_for_cytoscape(graph), "cytoscape": get_graph_for_cytoscape(graph),
} }

View File

@ -302,59 +302,9 @@ class FrameEngine:
expert.send_hud = original_hud expert.send_hud = original_hud
thought_summary = (f"response[{len(thought.response)}] tool={thought.tool_used or 'none'} " thought_summary = (f"response[{len(thought.response)}] tool={thought.tool_used or 'none'} "
f"actions={len(thought.actions)} errors={len(thought.errors)}") f"actions={len(thought.actions)}")
has_tool = bool(thought.tool_used and thought.tool_output) has_tool = bool(thought.tool_used and thought.tool_output)
# PA retry: if expert failed OR skipped tools when data was needed
expectation = self.memorizer.state.get("user_expectation", "conversational")
# Detect hallucination: expert returned no tool output for a data job
job_needs_data = any(k in (routing.job or "").lower()
for k in ["query", "select", "tabelle", "table", "daten", "data",
"cost", "kosten", "count", "anzahl", "average", "schnitt",
"find", "finde", "show", "zeig", "list", "beschreib"])
expert_skipped_tools = not has_tool and not thought.errors and job_needs_data
if (thought.errors or expert_skipped_tools) and not has_tool and expectation in ("delegated", "waiting_input", "conversational"):
retry_reason = f"{len(thought.errors)} errors" if thought.errors else "no tool calls for data job"
self._end_frame(rec, output_summary=thought_summary,
route="pa_retry", condition=f"expert_failed ({retry_reason}), expectation={expectation}")
await self._send_hud({"node": "runtime", "event": "pa_retry",
"detail": f"expert failed: {retry_reason}, retrying via PA"})
# Stream retry notice to user
retry_msg = "Anderer Ansatz..." if routing.language == "de" else "Trying a different approach..."
await self.sink.send_delta(retry_msg + "\n")
# PA reformulates with error context
retry_errors = thought.errors if thought.errors else [
{"query": "(none)", "error": "Expert produced no database queries. The job requires data lookup but the expert answered without querying. Reformulate with explicit query instructions."}
]
error_summary = "; ".join(e.get("error", "")[:80] for e in retry_errors[-2:])
rec = self._begin_frame(self.frame + 1, "pa_retry",
input_summary=f"errors: {error_summary[:100]}")
routing2 = await self.nodes["pa"].route_retry(
command, self.history, memory_context=mem_ctx,
identity=self.identity, channel=self.channel,
original_job=routing.job, errors=retry_errors)
self._end_frame(rec, output_summary=f"retry_job: {(routing2.job or '')[:60]}",
route=f"expert_{routing2.expert}" if routing2.expert != "none" else "output")
if routing2.expert != "none":
expert2 = self._experts.get(routing2.expert, expert)
rec = self._begin_frame(self.frame + 1, f"expert_{routing2.expert}_retry",
input_summary=f"retry job: {(routing2.job or '')[:80]}")
original_hud2 = expert2.send_hud
expert2.send_hud = self._make_progress_wrapper(original_hud2, routing2.language)
try:
thought = await expert2.execute(routing2.job, routing2.language)
finally:
expert2.send_hud = original_hud2
thought_summary = (f"response[{len(thought.response)}] tool={thought.tool_used or 'none'} "
f"errors={len(thought.errors)}")
has_tool = bool(thought.tool_used and thought.tool_output)
self._end_frame(rec, output_summary=thought_summary,
route="interpreter" if has_tool else "output+ui")
routing = routing2 # use retry routing for rest of pipeline
# Interpreter (conditional) # Interpreter (conditional)
if self.has_interpreter and has_tool: if self.has_interpreter and has_tool:
self._end_frame(rec, output_summary=thought_summary, self._end_frame(rec, output_summary=thought_summary,
@ -573,7 +523,7 @@ class FrameEngine:
return self._make_result(result) return self._make_result(result)
# Complex action — needs full pipeline # Complex action — needs full pipeline
self._end_frame(rec, output_summary="no local handler", route="pa/director/thinker") self._end_frame(rec, output_summary="no local handler", route="director/thinker")
action_desc = f"ACTION: {action}" action_desc = f"ACTION: {action}"
if data: if data:
@ -585,9 +535,7 @@ class FrameEngine:
analysis=InputAnalysis(intent="action", topic=action, complexity="simple"), analysis=InputAnalysis(intent="action", topic=action, complexity="simple"),
source_text=action_desc) source_text=action_desc)
if self.has_pa: if self.has_director:
return await self._run_expert_pipeline(command, mem_ctx, dashboard)
elif self.has_director:
return await self._run_director_pipeline(command, mem_ctx, dashboard) return await self._run_director_pipeline(command, mem_ctx, dashboard)
else: else:
return await self._run_thinker_pipeline(command, mem_ctx, dashboard) return await self._run_thinker_pipeline(command, mem_ctx, dashboard)
@ -657,10 +605,6 @@ class FrameEngine:
response, controls = await asyncio.gather(output_task, ui_task) response, controls = await asyncio.gather(output_task, ui_task)
if controls: if controls:
await self.sink.send_controls(controls) await self.sink.send_controls(controls)
# Send artifacts (new system) alongside controls
artifacts = self.ui_node.get_artifacts()
if artifacts:
await self.sink.send_artifacts(artifacts)
return response return response
def _check_condition(self, name: str, command: Command = None, def _check_condition(self, name: str, command: Command = None,
@ -678,7 +622,6 @@ class FrameEngine:
return { return {
"response": response, "response": response,
"controls": self.ui_node.current_controls, "controls": self.ui_node.current_controls,
"artifacts": self.ui_node.get_artifacts(),
"memorizer": self.memorizer.state, "memorizer": self.memorizer.state,
"frames": self.frame, "frames": self.frame,
"trace": self.last_trace.to_dict(), "trace": self.last_trace.to_dict(),

View File

@ -18,9 +18,6 @@ class Node:
self.context_fill_pct = 0 self.context_fill_pct = 0
async def hud(self, event: str, **data): async def hud(self, event: str, **data):
# Always include model on context events so frontend knows what model each node uses
if event == "context" and self.model:
data["model"] = self.model
await self.send_hud({"node": self.name, "event": event, **data}) await self.send_hud({"node": self.name, "event": event, **data})
def trim_context(self, messages: list[dict]) -> list[dict]: def trim_context(self, messages: list[dict]) -> list[dict]:

View File

@ -1,8 +1,4 @@
"""Eras Expert: Heizkostenabrechnung domain specialist. """Eras Expert: heating/energy customer database specialist."""
The expert knows the full database schema. No DESCRIBE at runtime.
All queries use verified column names and JOIN patterns.
"""
import asyncio import asyncio
import logging import logging
@ -17,209 +13,55 @@ class ErasExpertNode(ExpertNode):
name = "eras_expert" name = "eras_expert"
default_database = "eras2_production" default_database = "eras2_production"
DOMAIN_SYSTEM = """You are the Eras domain expert for Heizkostenabrechnung (German heating cost billing). DOMAIN_SYSTEM = """You are the Eras expert — specialist for heating and energy customer data.
You work with the eras2_production database containing customer, device, and billing data.
All table and column names are German (lowercase). Common queries involve customer lookups,
device counts, consumption analysis, and billing reports."""
BUSINESS CONTEXT: SCHEMA = """Known tables (eras2_production):
Eras is software for Hausverwaltungen and Messdienste who manage properties, meters, and billings. - kunden customers
The USER of this agent is an Eras customer exploring their data. They think in domain terms - objekte properties/objects linked to customers
(Kunden, Objekte, Wohnungen, Zaehler) NOT in SQL. Never expose SQL or table names to the user. - nutzeinheit usage units within objects
- geraete devices/meters
- geraeteverbraeuche device consumption readings
- abrechnungen billing records
DOMAIN MODEL: CRITICAL: You do NOT know the exact column names. They are German and unpredictable.
- Kunden = property managers (Hausverwaltungen). 693 in the system. Your FIRST tool_sequence step for ANY SELECT query MUST be DESCRIBE on the target table.
- Objekte = buildings/Liegenschaften managed by Kunden. 780 total. Linked via objektkunde (m:n). Then use the actual column names from the DESCRIBE result in your SELECT.
- Nutzeinheiten = apartments/units inside Objekte. 4578 total.
- Nutzer = tenants/occupants of Nutzeinheiten. 8206 total.
- Geraete = measurement devices (Heizkostenverteiler, Zaehler). 56726 total.
- Verbraeuche = consumption readings from Geraete. 1.3M readings.
- Adressen = postal addresses, linked via objektadressen/kundenadressen.
RESPOND IN DOMAIN LANGUAGE: Example tool_sequence for "show me 5 customers":
- Say "Kunde Jaeger hat 3 Objekte" not "SELECT COUNT..." [
- Say "12 Wohnungen mit 45 Geraeten" not "nutzeinheit rows" {{"tool": "query_db", "args": {{"query": "DESCRIBE kunden", "database": "eras2_production"}}}},
- Present data as summaries, not raw tables""" {{"tool": "query_db", "args": {{"query": "SELECT * FROM kunden LIMIT 5", "database": "eras2_production"}}}}
]"""
SCHEMA = """COMPLETE DATABASE SCHEMA (eras2_production) — use these exact column names: def __init__(self, send_hud, process_manager=None):
super().__init__(send_hud, process_manager)
self._schema_cache: dict[str, str] = {} # table_name -> DESCRIBE result
=== kunden (693 rows) === async def execute(self, job: str, language: str = "de"):
PK: ID (int) """Execute with schema auto-discovery. Caches DESCRIBE results."""
Name1, Name2, Name3 (longtext) customer name parts # Inject cached schema into the job context
Kundennummer (longtext) customer number if self._schema_cache:
AnredeID (FK), BriefanredeID (FK), ZugeordneterKomplettdruckID (FK) schema_ctx = "Known column names from previous DESCRIBE:\n"
Anmerkung, Fremdnummer, Ansprechpartner (longtext) for table, desc in self._schema_cache.items():
Steuernummer, UmsatzsteuerID (longtext) # Just first 5 lines to keep it compact
HatHistorie, IstWebkunde, IstNettoKunde, BrennstoffkostenNachFIFO, BelegePerEmail (bool) lines = desc.strip().split("\n")[:6]
MietpreisAnpassungProzent (decimal) schema_ctx += f"\n{table}:\n" + "\n".join(lines) + "\n"
job = job + "\n\n" + schema_ctx
=== objektkunde (911 rows) JUNCTION: kunden objekte (many-to-many) === result = await super().execute(job, language)
PK: ID (int)
KundeID (FK kunden.ID)
ObjektID (FK objekte.ID)
ZeitraumVon, ZeitraumBis (datetime)
IstKunde, IstEigentuemer, IstRechnungsempfaenger, IstAbrechnungsempfaenger (bool)
=== objekte (780 rows) === # Cache any DESCRIBE results from this execution
PK: ID (int) # Parse from tool_output if it looks like a DESCRIBE result
Objektnummer (longtext) building reference number if result.tool_output and "Field\t" in result.tool_output:
AbleserID, MonteurID, UVIRefObjektID, ZugeordneterKomplettdruckID (FK) # Try to identify which table was described
Anmerkung, AnmerkungIntern (longtext) for table in ["kunden", "objekte", "nutzeinheit", "geraete",
HatHistorie, VorauszahlungGetrennt, Selbstablesung, IstObjektFreigegeben (bool) "geraeteverbraeuche", "abrechnungen"]:
if table in job.lower() or table in result.tool_output.lower():
self._schema_cache[table] = result.tool_output
log.info(f"[eras] cached schema for {table}")
break
=== objektadressen JUNCTION: objekte adressen === return result
PK: ID, ObjektID (FK objekte.ID), AdresseID (FK adressen.ID), IstPrimaer (bool)
=== kundenadressen JUNCTION: kunden adressen ===
PK: ID, KundeID (FK kunden.ID), AdresseID (FK adressen.ID), TypDerAdresseID (FK)
=== adressen (1762 rows) ===
PK: ID (int)
Strasse, Hausnummer, Postleitzahl, Ort, Adresszusatz, Postfach (longtext)
LandID (FK), Laengengrad, Breitengrad (double)
=== nutzeinheit (4578 rows) ===
PK: ID (int)
ObjektID (FK objekte.ID)
NeNummerInt (longtext) unit number
Lage, Stockwerk, Flaeche, Nutzflaeche (various)
AdresseID (FK), CustomStatusKeyID (FK)
=== kundenutzeinheit JUNCTION: kunden nutzeinheit ===
PK: ID, KundeID (FK kunden.ID), NutzeinheitID (FK nutzeinheit.ID), Von, Bis (datetime)
=== nutzer (8206 rows) tenants/occupants ===
PK: ID (int)
NutzeinheitID (FK nutzeinheit.ID)
Name1, Name2, Name3, Name4 (longtext) tenant name
NutzungVon, NutzungBis (datetime)
ArtDerNutzung (int), AnredeID (FK), BriefanredeID (FK)
IstGesperrt, Selbstableser (bool)
=== geraete (56726 rows) meters/devices ===
PK: ID (int)
NutzeinheitID (FK nutzeinheit.ID)
Geraetenummer (longtext) device number/serial
Bezeichnung (longtext) device name/label
Beschreibung (longtext) description
ArtikelID (FK), NutzergruppenID (FK), Einheit (int)
Einbaudatum, Ausbaudatum, GeeichtBis, GeeichtAm, ErstInbetriebnahme, DefektAb (datetime)
FirmwareVersion, LaufendeNummer, GruppenKennung, Memo, AllgemeinesMemo (longtext)
AnsprechpartnerID, ZugeordneterRaumID, CustomStatusKeyID (FK)
Gemietet, Gewartet, KeinAndruck, IstAbzuziehendesGeraet, HatHistorie (bool)
=== geraeteverbraeuche (1.3M rows) consumption readings ===
PK: ID (int)
GeraetID (FK geraete.ID)
Ablesedatum (datetime) reading date
Ablesung (double) meter reading value
Verbrauch (double) consumption value
Faktor (double) factor
Aenderungsdatum (datetime)
AbleseartID (FK), Schaetzung (int), Status (int)
IstRekonstruiert (bool), Herkunft (int)
ManuellerWert (double), Rohablesung (double)
Anmerkung, Fehler, Ampullenfarbe (longtext)
=== auftraege (2960 rows) billing work orders ===
PK: ID (int)
AuftragNummer, Bezeichnung (longtext)
ErstellDatum, Abgeschlossen (datetime)
ZugeordneteAbrechnungsinformationID (FK abrechnungsinformationen.ID)
ErstellMitarbeiterID (FK), AuftragsTyp (int), Status (int)
Anmerkung, ObererText, UntererText (longtext)
=== auftragspositionen (5094 rows) line items per work order ===
PK: ID (int)
AuftragID (FK auftraege.ID)
ArtikelID (FK artikel.ID)
SollMenge, IstMenge (int)
ZugeordneterGeraeteArtikelID (FK), ZugeordneteVertragPositionID (FK)
=== artikelposition (70164 rows) billing line items with prices ===
PK: ID (int)
ZugewiesenerArtikelID (FK artikel.ID)
ZugewieseneAbrechnungID (FK abrechnungsinformationen.ID)
RechnungID (FK rechnung.ID)
MengeVorgabe, Menge (decimal), NettoVorgabe, Netto (decimal), MWST (decimal)
Rechnungsart (int), VorschussBerechnung (bool), ARechnung (bool)
VerstecktInNebenkostenID (FK), ZugeordneteVertragPositionID (FK)
=== artikel (1078 rows) service/product catalog ===
PK: ID (int)
Artikelnummer, Bezeichnung (longtext)
Netto (decimal), MWST (decimal)
BerechnungsZiel (int), UmlageIn (int)
ZugeordnetePreislisteID (FK)
IstStandard, ARechnung, AppZusatz, IstEigenKostenpos (bool)
=== rechnung (7356 rows) invoices ===
PK: ID (int)
Rechnungsnummer (longtext), Rechnungsart (int)
BezahltAm (datetime), BezahlterBetrag (decimal)
Druckdatum, Erstelldatum, Exportdatum (datetime)
AbrechnungsinformationID (FK abrechnungsinformationen.ID)
AbschlagSummeSonder, AbschlagSummeStandard (decimal)
Bankeinzug (bool)
=== abrechnungsinformationen (4261 rows) billing periods/settings ===
PK: ID (int)
Von, Bis (datetime) billing period
AbrechnungHeizung, AbrechnungWarmwasser, AbrechnungNebenkosten, AbrechnungKaltwasser (bool)
Tarifabrechnung, BHKW, HeizsaldoInNebenkosten, AbrechnungLegionellen, AbrechnungRauchmelder (bool)
=== nebenkosten (42209 rows) ancillary cost items ===
PK: ID (int)
Von, Bis (datetime)
Bezeichnung (longtext), Mwst (decimal), Brutto (decimal)
EinheitDerKostenart (longtext), Umlage (int), UmlageZiel (int)
ZugeordnetesObjektID (FK objekte.ID)
NurEigentuemer, NurNutzer (bool)
=== vorauszahlungen (83932 rows) advance payments per tenant ===
PK: ID (int)
ZugeordneterNutzerID (FK nutzer.ID)
BetragNebenkosten, BetragHeizkosten, BetragWarmwasser (decimal)
Von, Bis (datetime), IstNetto (bool)
=== heizbetriebskosten (22557 rows) heating operation costs ===
PK: ID (int)
Von, Bis (datetime), Bezeichnung (longtext)
Mwst (decimal), Brutto (decimal), Art (int)
ZugeordnetesObjektID (FK objekte.ID)
ZugeordneteVerbrauchsgruppeID (FK)
=== brennstofflieferungen (6477 rows) fuel deliveries ===
PK: ID (int)
GeliefertAm (datetime), Menge (decimal), Betrag (decimal)
Mwst (decimal), Heizwert (decimal)
Anfangsstand, Endstand (decimal)
ZugeordneterEnergieVerwerterID (FK), BrennstoffMediumID (FK)
ZugeordneteAbrechnungsinformationID (FK abrechnungsinformationen.ID)
=== vertragpositionen (4395 rows) contract line items ===
PK: ID (int)
LaufzeitVon, LaufzeitBis (datetime)
Menge (decimal), Gesamtpreis (decimal), PreisProEinheit (decimal), Mwst (decimal)
ArtikelID (FK artikel.ID), VertragNummer (longtext)
Art (int), Umlage (int)
JOIN PATTERNS (use exactly):
Kunde Objekte: JOIN objektkunde ok ON ok.KundeID = k.ID JOIN objekte o ON o.ID = ok.ObjektID
Objekt Adresse: JOIN objektadressen oa ON oa.ObjektID = o.ID JOIN adressen a ON a.ID = oa.AdresseID
Kunde Adresse: JOIN kundenadressen ka ON ka.KundeID = k.ID JOIN adressen a ON a.ID = ka.AdresseID
Objekt NE: JOIN nutzeinheit ne ON ne.ObjektID = o.ID
NE Nutzer: JOIN nutzer nu ON nu.NutzeinheitID = ne.ID
NE Geraete: JOIN geraete g ON g.NutzeinheitID = ne.ID
Geraet Verbrauch: JOIN geraeteverbraeuche gv ON gv.GeraetID = g.ID
Auftrag Positionen: JOIN auftragspositionen ap ON ap.AuftragID = a.ID
Auftrag Abrechnung: JOIN abrechnungsinformationen ai ON ai.ID = a.ZugeordneteAbrechnungsinformationID
Artikelpos Artikel: JOIN artikel art ON art.ID = ap.ZugewiesenerArtikelID
Artikelpos Rechnung: JOIN rechnung r ON r.ID = ap.RechnungID
Artikelpos Abrechnung: JOIN abrechnungsinformationen ai ON ai.ID = ap.ZugewieseneAbrechnungID
Nebenkosten Objekt: JOIN objekte o ON o.ID = nk.ZugeordnetesObjektID
Vorauszahlung Nutzer: JOIN nutzer nu ON nu.ID = vz.ZugeordneterNutzerID
RULES:
- For tables listed above: use ONLY the listed column names. Never guess.
- For tables NOT listed above: use SELECT * with LIMIT to discover columns.
- If a query fails, the retry system will show you the error. Fix the column name and try again.
- Always LIMIT large queries (max 50 rows).
- Use LEFT JOIN when results might be empty."""

View File

@ -38,38 +38,28 @@ Given a job description, produce a JSON tool sequence to accomplish it.
Available tools: Available tools:
- query_db(query, database) SQL SELECT/DESCRIBE/SHOW only - query_db(query, database) SQL SELECT/DESCRIBE/SHOW only
- emit_actions(actions) show buttons [{label, action, payload?}] - emit_actions(actions) show buttons [{{label, action, payload?}}]
- set_state(key, value) persistent key-value - set_state(key, value) persistent key-value
- create_machine(id, initial, states) interactive UI navigation - emit_display(items) formatted data [{{type, label, value?, style?}}]
- add_state / reset_machine / destroy_machine machine lifecycle - create_machine(id, initial, states) interactive UI with navigation
- update_machine(id, data) update wizard data fields (e.g. {"bundesland": "Bayern"}) states: {{"state_name": {{"actions": [...], "display": [...]}}}}
- transition_machine(id, target) move machine to a specific state - add_state(id, state, buttons, content) add state to machine
- emit_artifact(type, data, actions?, meta?) emit a typed workspace artifact: - reset_machine(id) reset to initial
type="entity_detail": data={title, subtitle?, fields:[{label,value}]}, actions=[{label,action}] - destroy_machine(id) remove machine
type="data_table": data={title?, columns:[str], rows:[{col:val}]}
type="document_page": data={title, sections:[{heading,content}]}
type="action_bar": actions=[{label, action, payload?}]
type="status": data={label, value?, display_type:"progress"|"info"|"text"}
PREFERRED: Use emit_artifact for all display output. Legacy emit_card/emit_display still work but emit_artifact is cleaner.
Cards are also generated automatically in the response step from query results.
Output ONLY valid JSON: Output ONLY valid JSON:
{ {{
"tool_sequence": [ "tool_sequence": [
{"tool": "query_db", "args": {"query": "SELECT ...", "database": "{database}"}} {{"tool": "query_db", "args": {{"query": "SELECT ...", "database": "{database}"}}}},
{{"tool": "emit_actions", "args": {{"actions": [{{"label": "...", "action": "..."}}]}}}}
], ],
"response_hint": "How to phrase the result" "response_hint": "How to phrase the result for the user"
} }}
Rules: Rules:
- NEVER guess column names. Use ONLY columns from the schema. - NEVER guess column names. If unsure, DESCRIBE first.
- Max 5 tools. Keep it focused. - Max 5 tools. Keep it focused.
- For entity details: query all relevant fields, the response step creates the card. - The job is self-contained all context you need is in the job description."""
- For lists: query multiple rows, the table renders automatically.
- The job is self-contained.
- NEVER answer data questions without querying the database. You MUST include at least one query_db call for any job that asks about data, counts, costs, or entities. If you are unsure which tables to use, start with DESCRIBE or SELECT * FROM table LIMIT 3 to explore.
- An EMPTY tool_sequence is ONLY acceptable if the job explicitly asks for a UI-only action (buttons, machine, display) with no data lookup."""
RESPONSE_SYSTEM = """You are a domain expert summarizing results for the user. RESPONSE_SYSTEM = """You are a domain expert summarizing results for the user.
@ -78,73 +68,37 @@ Rules:
Job: {job} Job: {job}
{results} {results}
Output a JSON object with "text" (response to user) and optionally "card" (structured display): Write a concise, natural response. 1-3 sentences.
- Reference specific data from the results.
{ - Don't repeat raw output — summarize.
"text": "Concise natural response, 1-3 sentences. Reference data. Match language: {language}.", - Match the language: {language}."""
"card": {
"title": "Entity Name or ID",
"subtitle": "Type or category",
"fields": [{"label": "Field", "value": "actual value from results"}],
"actions": [{"label": "Next action", "action": "action_id"}]
}
}
Rules:
- "text" is REQUIRED. Keep it short.
- "card" is OPTIONAL. Include it for single-entity details (Kunde, Objekt, Auftrag).
- Card fields must use ACTUAL values from the query results, never templates/placeholders.
- For lists of multiple entities, use multiple fields or skip the card.
- If no card makes sense, just return {"text": "..."}.
- Output ONLY valid JSON."""
def __init__(self, send_hud, process_manager=None): def __init__(self, send_hud, process_manager=None):
super().__init__(send_hud) super().__init__(send_hud)
MAX_RETRIES = 3
async def execute(self, job: str, language: str = "de") -> ThoughtResult: async def execute(self, job: str, language: str = "de") -> ThoughtResult:
"""Execute a self-contained job with retry on SQL errors. """Execute a self-contained job. Returns ThoughtResult."""
Expert knows the schema plan, execute, retry if needed, respond."""
await self.hud("thinking", detail=f"planning: {job[:80]}") await self.hud("thinking", detail=f"planning: {job[:80]}")
errors_so_far = [] # Step 1: Plan tool sequence
tool_sequence = []
response_hint = ""
for attempt in range(1, self.MAX_RETRIES + 1):
# Plan (or re-plan with error context)
plan_prompt = f"Job: {job}"
if errors_so_far:
plan_prompt += "\n\nPREVIOUS ATTEMPTS FAILED:\n"
for err in errors_so_far:
plan_prompt += f"- Query: {err['query']}\n Error: {err['error']}\n"
if 'describe' in err:
plan_prompt += f" DESCRIBE result: {err['describe'][:300]}\n"
plan_prompt += "\nFix the query. If a column was unknown, use the DESCRIBE result above or try SELECT * LIMIT 3 to see actual columns."
plan_system = self.PLAN_SYSTEM
plan_system = plan_system.replace("{domain}", self.DOMAIN_SYSTEM)
plan_system = plan_system.replace("{schema}", self.SCHEMA)
plan_system = plan_system.replace("{database}", self.default_database)
plan_messages = [ plan_messages = [
{"role": "system", "content": plan_system}, {"role": "system", "content": self.PLAN_SYSTEM.format(
{"role": "user", "content": plan_prompt}, domain=self.DOMAIN_SYSTEM, schema=self.SCHEMA,
database=self.default_database)},
{"role": "user", "content": f"Job: {job}"},
] ]
plan_raw = await llm_call(self.model, plan_messages) plan_raw = await llm_call(self.model, plan_messages)
tool_sequence, response_hint = self._parse_plan(plan_raw) tool_sequence, response_hint = self._parse_plan(plan_raw)
await self.hud("planned", tools=len(tool_sequence),
hint=response_hint[:80], attempt=attempt)
# Execute tools await self.hud("planned", tools=len(tool_sequence), hint=response_hint[:80])
# Step 2: Execute tools
actions = [] actions = []
state_updates = {} state_updates = {}
display_items = [] display_items = []
machine_ops = [] machine_ops = []
artifacts = []
tool_used = "" tool_used = ""
tool_output = "" tool_output = ""
had_error = False
for step in tool_sequence: for step in tool_sequence:
tool = step.get("tool", "") tool = step.get("tool", "")
@ -153,14 +107,6 @@ Rules:
if tool == "emit_actions": if tool == "emit_actions":
actions.extend(args.get("actions", [])) actions.extend(args.get("actions", []))
elif tool == "emit_card":
card = args.get("card", args)
card["type"] = "card"
display_items.append(card)
elif tool == "emit_list":
lst = args.get("list", args)
lst["type"] = "list"
display_items.append(lst)
elif tool == "set_state": elif tool == "set_state":
key = args.get("key", "") key = args.get("key", "")
if key: if key:
@ -175,106 +121,32 @@ Rules:
machine_ops.append({"op": "reset", **args}) machine_ops.append({"op": "reset", **args})
elif tool == "destroy_machine": elif tool == "destroy_machine":
machine_ops.append({"op": "destroy", **args}) machine_ops.append({"op": "destroy", **args})
elif tool == "update_machine":
machine_ops.append({"op": "update_data", **args})
elif tool == "transition_machine":
machine_ops.append({"op": "transition", **args})
elif tool == "emit_artifact":
import uuid
artifact = {
"id": args.get("id", str(uuid.uuid4())[:8]),
"type": args.get("type", "status"),
"data": args.get("data", {}),
"actions": args.get("actions", []),
"meta": args.get("meta", {}),
}
artifacts.append(artifact)
elif tool == "query_db": elif tool == "query_db":
query = args.get("query", "") query = args.get("query", "")
database = args.get("database", self.default_database) database = args.get("database", self.default_database)
try: try:
result = await asyncio.to_thread(run_db_query, query, database) result = await asyncio.to_thread(run_db_query, query, database)
if result.startswith("Error:"):
err_entry = {"query": query, "error": result}
# Auto-DESCRIBE on column errors to help retry
if "Unknown column" in result or "1054" in result:
import re
# Extract table name from query
tables_in_query = re.findall(r'FROM\s+(\w+)|JOIN\s+(\w+)', query, re.IGNORECASE)
for match in tables_in_query:
tname = match[0] or match[1]
if tname:
try:
desc = await asyncio.to_thread(run_db_query, f"DESCRIBE {tname}", database)
err_entry["describe"] = f"{tname}: {desc[:300]}"
await self.hud("tool_result", tool="describe",
output=f"Auto-DESCRIBE {tname}")
except Exception:
pass
break
errors_so_far.append(err_entry)
had_error = True
await self.hud("tool_result", tool="query_db",
output=f"ERROR (attempt {attempt}): {result[:150]}")
break
tool_used = "query_db" tool_used = "query_db"
tool_output = result tool_output = result
await self.hud("tool_result", tool="query_db", output=result[:200]) await self.hud("tool_result", tool="query_db", output=result[:200])
except Exception as e: except Exception as e:
errors_so_far.append({"query": query, "error": str(e)}) tool_used = "query_db"
had_error = True tool_output = f"Error: {e}"
await self.hud("tool_result", tool="query_db", await self.hud("tool_result", tool="query_db", output=str(e)[:200])
output=f"ERROR (attempt {attempt}): {e}")
break
if not had_error: # Step 3: Generate response
break # success — stop retrying
log.info(f"[expert] attempt {attempt} failed, {len(errors_so_far)} errors")
# Generate response (with whatever we have — success or final error)
results_text = "" results_text = ""
if tool_output: if tool_output:
results_text = f"Tool result:\n{tool_output[:500]}" results_text = f"Tool result:\n{tool_output[:500]}"
elif errors_so_far:
results_text = f"All {len(errors_so_far)} query attempts failed:\n"
for err in errors_so_far[-2:]:
results_text += f" {err['error'][:100]}\n"
resp_system = self.RESPONSE_SYSTEM
resp_system = resp_system.replace("{domain}", self.DOMAIN_SYSTEM)
resp_system = resp_system.replace("{job}", job)
resp_system = resp_system.replace("{results}", results_text)
resp_system = resp_system.replace("{language}", language)
resp_messages = [ resp_messages = [
{"role": "system", "content": resp_system}, {"role": "system", "content": self.RESPONSE_SYSTEM.format(
domain=self.DOMAIN_SYSTEM, job=job, results=results_text, language=language)},
{"role": "user", "content": job}, {"role": "user", "content": job},
] ]
raw_response = await llm_call(self.model, resp_messages) response = await llm_call(self.model, resp_messages)
if not response:
# Parse JSON response with optional card response = "[no response]"
response = raw_response or "[no response]"
try:
text = raw_response.strip()
if text.startswith("```"):
text = text.split("\n", 1)[1] if "\n" in text else text[3:]
if text.endswith("```"):
text = text[:-3]
text = text.strip()
resp_data = json.loads(text)
response = resp_data.get("text", raw_response)
if resp_data.get("artifact"):
# New: artifact in response JSON
art = resp_data["artifact"]
import uuid
if "id" not in art:
art["id"] = str(uuid.uuid4())[:8]
artifacts.append(art)
elif resp_data.get("card"):
card = resp_data["card"]
card["type"] = "card"
display_items.append(card)
except (json.JSONDecodeError, Exception):
pass # Use raw response as text
await self.hud("done", response=response[:100]) await self.hud("done", response=response[:100])
@ -286,8 +158,6 @@ Rules:
state_updates=state_updates, state_updates=state_updates,
display_items=display_items, display_items=display_items,
machine_ops=machine_ops, machine_ops=machine_ops,
errors=errors_so_far,
artifacts=artifacts,
) )
def _parse_plan(self, raw: str) -> tuple[list, str]: def _parse_plan(self, raw: str) -> tuple[list, str]:

View File

@ -22,7 +22,7 @@ Listener: {identity} on {channel}
Return ONLY valid JSON. No markdown, no explanation. Return ONLY valid JSON. No markdown, no explanation.
Schema: Schema:
{ {{
"who": "name or unknown", "who": "name or unknown",
"language": "en | de | mixed", "language": "en | de | mixed",
"intent": "question | request | social | action | feedback", "intent": "question | request | social | action | feedback",
@ -30,7 +30,7 @@ Schema:
"tone": "casual | frustrated | playful | urgent", "tone": "casual | frustrated | playful | urgent",
"complexity": "trivial | simple | complex", "complexity": "trivial | simple | complex",
"context": "brief note or empty" "context": "brief note or empty"
} }}
Rules: Rules:
- Classify the CURRENT message only. Previous messages are context, not the target. - Classify the CURRENT message only. Previous messages are context, not the target.
@ -53,11 +53,11 @@ Rules:
casual = neutral casual = neutral
Examples: Examples:
"hi there!" -> {"language":"en","intent":"social","tone":"casual","complexity":"trivial"} "hi there!" -> {{"language":"en","intent":"social","tone":"casual","complexity":"trivial"}}
"Wie spaet ist es?" -> {"language":"de","intent":"question","tone":"casual","complexity":"simple"} "Wie spaet ist es?" -> {{"language":"de","intent":"question","tone":"casual","complexity":"simple"}}
"this is broken, nothing works" -> {"language":"en","intent":"feedback","tone":"frustrated","complexity":"simple"} "this is broken, nothing works" -> {{"language":"en","intent":"feedback","tone":"frustrated","complexity":"simple"}}
"create two buttons" -> {"language":"en","intent":"request","tone":"casual","complexity":"simple"} "create two buttons" -> {{"language":"en","intent":"request","tone":"casual","complexity":"simple"}}
"ok thanks bye" -> {"language":"en","intent":"social","tone":"casual","complexity":"trivial"} "ok thanks bye" -> {{"language":"en","intent":"social","tone":"casual","complexity":"trivial"}}
{memory_context}""" {memory_context}"""
@ -78,9 +78,8 @@ Examples:
history_summary = "Recent conversation:\n" + "\n".join(lines) history_summary = "Recent conversation:\n" + "\n".join(lines)
messages = [ messages = [
{"role": "system", "content": self.SYSTEM.replace( {"role": "system", "content": self.SYSTEM.format(
"{memory_context}", memory_context).replace( memory_context=memory_context, identity=identity, channel=channel)},
"{identity}", identity).replace("{channel}", channel)},
] ]
if history_summary: if history_summary:
messages.append({"role": "user", "content": history_summary}) messages.append({"role": "user", "content": history_summary})

View File

@ -26,19 +26,6 @@ Given the conversation so far, output a JSON object with these fields:
- language: string primary language being used (en, de, mixed) - language: string primary language being used (en, de, mixed)
- style_hint: string how Output should talk (casual, formal, technical, poetic, etc.) - style_hint: string how Output should talk (casual, formal, technical, poetic, etc.)
- facts: list of strings important facts learned about the user. NEVER drop facts from the existing list unless they are proven wrong. Always include all existing facts plus any new ones. - facts: list of strings important facts learned about the user. NEVER drop facts from the existing list unless they are proven wrong. Always include all existing facts plus any new ones.
- user_expectation: string what the user expects the agent to do next. One of:
"conversational" default. User is chatting, asking questions, browsing. Normal back-and-forth.
"delegated" user gave an imperative task ("build X", "do Y", "create Z"). They expect autonomous progress, not clarifying questions.
"waiting_input" agent asked a question or presented choices. User's next message is likely an answer.
"observing" user returned after being idle, or is reviewing a large output. Brief responses, wait for explicit engagement.
Cues:
- Imperative verbs + task scope ("build", "create", "do", "find") delegated
- Agent ended with "Moment..." / thinking message but user hasn't seen full results yet → delegated (task still in progress)
- Short follow-ups like "und?", "ja?", "weiter?", "and?", "so?", "result?", "ergebnis?" waiting_input (user is waiting for the agent to deliver)
- Agent ended with a question ("Sollen wir...?", "Gibt es...?") waiting_input
- User said "ok/thanks/bye/danke" after output observing
- Everything else conversational
IMPORTANT: If the agent just delivered partial results or said "Moment..." and the user sends a short nudge, that is ALWAYS waiting_input, never conversational.
Output ONLY valid JSON. No explanation, no markdown fences.""" Output ONLY valid JSON. No explanation, no markdown fences."""
@ -53,7 +40,6 @@ Output ONLY valid JSON. No explanation, no markdown fences."""
"language": "en", "language": "en",
"style_hint": "casual, technical", "style_hint": "casual, technical",
"facts": [], "facts": [],
"user_expectation": "conversational",
} }
def get_context_block(self, sensor_lines: list[str] = None, ui_state: dict = None) -> str: def get_context_block(self, sensor_lines: list[str] = None, ui_state: dict = None) -> str:

View File

@ -34,12 +34,6 @@ YOUR JOB: Transform the Thinker's reasoning into a natural, human-readable text
- Keep the user's language — if they wrote German, respond in German. - Keep the user's language — if they wrote German, respond in German.
- Be concise. Don't describe data that the UI node will show as a table. - Be concise. Don't describe data that the UI node will show as a table.
PHRASING by user_expectation (from memorizer):
- "delegated": progress-report style. State what was done and what's next. No questions unless blocked.
- "waiting_input": acknowledge the user's answer and continue the flow naturally.
- "observing": keep it brief. No unsolicited follow-up questions or suggestions.
- "conversational": natural, warm dialogue. Follow-ups are fine.
{memory_context}""" {memory_context}"""
async def process(self, thought: ThoughtResult, history: list[dict], async def process(self, thought: ThoughtResult, history: list[dict],
@ -48,7 +42,7 @@ PHRASING by user_expectation (from memorizer):
await self.hud("streaming") await self.hud("streaming")
messages = [ messages = [
{"role": "system", "content": self.SYSTEM.replace("{memory_context}", memory_context)}, {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
] ]
for msg in history[-20:]: for msg in history[-20:]:
messages.append(msg) messages.append(msg)

View File

@ -27,8 +27,6 @@ Experts have these tools:
- query_db SQL queries on their domain database - query_db SQL queries on their domain database
- emit_actions create buttons on the dashboard - emit_actions create buttons on the dashboard
- create_machine / add_state / reset_machine / destroy_machine interactive UI components - create_machine / add_state / reset_machine / destroy_machine interactive UI components
- update_machine(id, data) update wizard data fields on existing machine
- transition_machine(id, target) move machine to a specific state
- set_state persistent key-value store - set_state persistent key-value store
- emit_display formatted data display - emit_display formatted data display
@ -38,13 +36,13 @@ YOUR JOB:
3. Only respond directly for social chat (greetings, thanks, bye, small talk) 3. Only respond directly for social chat (greetings, thanks, bye, small talk)
Output ONLY valid JSON: Output ONLY valid JSON:
{ {{
"expert": "{expert_names} | none", "expert": "{expert_names} | none",
"job": "Self-contained task. Include ALL context — the expert has NO conversation history. Describe what to query, what UI to build, what the user expects to see.", "job": "Self-contained task. Include ALL context — the expert has NO conversation history. Describe what to query, what UI to build, what the user expects to see.",
"thinking_message": "Short message for user while expert works, in their language", "thinking_message": "Short message for user while expert works, in their language",
"response_hint": "If expert=none, your direct response to the user.", "response_hint": "If expert=none, your direct response to the user.",
"language": "de | en | mixed" "language": "de | en | mixed"
} }}
Rules: Rules:
- expert=none ONLY for social chat (hi, thanks, bye, how are you) - expert=none ONLY for social chat (hi, thanks, bye, how are you)
@ -55,21 +53,11 @@ Rules:
- thinking_message: natural, in user's language. e.g. "Moment, ich schaue nach..." - thinking_message: natural, in user's language. e.g. "Moment, ich schaue nach..."
- If the user mentions data, tables, customers, devices, buttons, counters expert - If the user mentions data, tables, customers, devices, buttons, counters expert
- When unsure which expert: pick the one whose domain matches best - When unsure which expert: pick the one whose domain matches best
- MACHINE STATE: If there are active machines/wizards listed in the context below, ALWAYS include the machine's current state and stored data in the job. The expert needs this to continue the workflow. Example: "Machine 'angebot_wizard' is on step 'select_age', data: {bundesland: Bayern}. User asks: ..."
- If the user asks about their wizard/workflow progress and the info is already visible in the context, respond directly (expert=none) using the machine state from context. Only route to expert if the user needs data queried or tools called.
- For update_machine / transition_machine requests: route to expert with the machine ID and operation details in the job.
USER EXPECTATION (from memorizer):
- If user_expectation is "delegated": formulate comprehensive, autonomous jobs. Do NOT include clarifying questions in the job. Tell the expert to proceed and report results.
- If user_expectation is "waiting_input": the user is waiting for results or nudging ("und?", "ja?", "weiter?"). Look at conversation history to find what they were waiting for and re-formulate that job. If they answered a question you asked, extract their answer and fold it into context.
- If user_expectation is "observing": only route to expert if the user explicitly asks for something. Otherwise respond directly with brief acknowledgment.
- If user_expectation is "conversational": normal routing behavior.
- CONTINUATION: When user sends a very short message (1-3 words like "und?", "weiter", "ja") after partial/incomplete results, treat it as "continue the previous task". Include the original question and any partial results in the job.
{memory_context}""" {memory_context}"""
EXPERT_DESCRIPTIONS = { EXPERT_DESCRIPTIONS = {
"eras": "eras — Heizkostenabrechnung (German heating cost billing). Users are Hausverwaltungen managing Kunden, Objekte (buildings), Nutzeinheiten (apartments), Geraete (meters), Verbraeuche (readings), Abrechnungen (billings), Auftraege (work orders). Hierarchy: Kunde > Objekte > Nutzeinheiten > Geraete > Verbraeuche. Database: eras2_production. Can also build dashboard UI.", "eras": "eras — heating/energy domain. Database: eras2_production (customers, devices, billing, consumption). Can also build dashboard UI (buttons, machines, counters, tables) for energy data workflows.",
"plankiste": "plankiste — Kita planning domain. Database: plankiste_test (children, care schedules, offers, pricing). Can build dashboard UI for education workflows and generate Angebote.", "plankiste": "plankiste — Kita planning domain. Database: plankiste_test (children, care schedules, offers, pricing). Can build dashboard UI for education workflows and generate Angebote.",
} }
@ -101,15 +89,10 @@ USER EXPECTATION (from memorizer):
expert_lines.append("- (no experts available — handle everything directly)") expert_lines.append("- (no experts available — handle everything directly)")
expert_names = " | ".join(self._available_experts) if self._available_experts else "none" expert_names = " | ".join(self._available_experts) if self._available_experts else "none"
# Manual substitution to avoid .format() breaking on curly braces in memory_context
system_content = self.SYSTEM
system_content = system_content.replace("{memory_context}", memory_context)
system_content = system_content.replace("{identity}", identity)
system_content = system_content.replace("{channel}", channel)
system_content = system_content.replace("{experts}", "\n".join(expert_lines))
system_content = system_content.replace("{expert_names}", expert_names)
messages = [ messages = [
{"role": "system", "content": system_content}, {"role": "system", "content": self.SYSTEM.format(
memory_context=memory_context, identity=identity, channel=channel,
experts="\n".join(expert_lines), expert_names=expert_names)},
] ]
# Summarize recent history (PA sees full context) # Summarize recent history (PA sees full context)
@ -135,7 +118,7 @@ USER EXPECTATION (from memorizer):
log.info(f"[pa] raw: {raw[:300]}") log.info(f"[pa] raw: {raw[:300]}")
routing = self._parse_routing(raw, command) routing = self._parse_routing(raw, command)
await self.hud("routed", expert=routing.expert, job=(routing.job or "")[:100], await self.hud("routed", expert=routing.expert, job=routing.job[:100],
direct=routing.expert == "none") direct=routing.expert == "none")
# Update directive style based on tone # Update directive style based on tone
@ -148,72 +131,6 @@ USER EXPECTATION (from memorizer):
return routing return routing
async def route_retry(self, command: Command, history: list[dict],
memory_context: str = "", identity: str = "unknown",
channel: str = "unknown", original_job: str = "",
errors: list = None) -> PARouting:
"""Re-route after expert failure. PA reformulates with error context."""
await self.hud("thinking", detail="reformulating after expert failure")
error_lines = []
for err in (errors or [])[-3:]:
error_lines.append(f"- Query: {err.get('query', '?')[:100]}")
error_lines.append(f" Error: {err.get('error', '?')[:100]}")
if err.get("describe"):
error_lines.append(f" Schema: {err['describe'][:200]}")
retry_prompt = f"""The expert FAILED the previous job. You must reformulate.
ORIGINAL JOB: {original_job}
ERRORS:
{chr(10).join(error_lines)}
REFORMULATE the job with a DIFFERENT approach:
- If the query was too complex (JOINs, window functions), break it into simpler steps
- If columns were wrong, use the DESCRIBE info above to fix them
- If the table structure is unclear, tell the expert to first explore with SELECT * LIMIT 5
- Think about what data the user actually needs and find a simpler path to it
Output the same JSON format as before. The job MUST be different from the original."""
expert_lines = []
for name in self._available_experts:
desc = self.EXPERT_DESCRIPTIONS.get(name, f"{name} — domain expert")
expert_lines.append(f"- {desc}")
expert_names = " | ".join(self._available_experts) if self._available_experts else "none"
system_content = self.SYSTEM
system_content = system_content.replace("{memory_context}", memory_context)
system_content = system_content.replace("{identity}", identity)
system_content = system_content.replace("{channel}", channel)
system_content = system_content.replace("{experts}", "\n".join(expert_lines))
system_content = system_content.replace("{expert_names}", expert_names)
messages = [
{"role": "system", "content": system_content},
]
recent = history[-8:]
if recent:
lines = []
for msg in recent:
role = msg.get("role", "?")
content = msg.get("content", "")[:200]
lines.append(f" {role}: {content}")
messages.append({"role": "user", "content": "Recent conversation:\n" + "\n".join(lines)})
messages.append({"role": "assistant", "content": "OK, I have the context."})
messages.append({"role": "user", "content": retry_prompt})
messages = self.trim_context(messages)
raw = await llm_call(self.model, messages)
log.info(f"[pa] retry raw: {raw[:300]}")
routing = self._parse_routing(raw, command)
await self.hud("routed", expert=routing.expert, job=(routing.job or "")[:100],
direct=routing.expert == "none", retry=True)
return routing
def _parse_routing(self, raw: str, command: Command) -> PARouting: def _parse_routing(self, raw: str, command: Command) -> PARouting:
"""Parse LLM JSON into PARouting with fallback.""" """Parse LLM JSON into PARouting with fallback."""
text = raw.strip() text = raw.strip()
@ -232,10 +149,10 @@ Output the same JSON format as before. The job MUST be different from the origin
expert = "none" expert = "none"
return PARouting( return PARouting(
expert=expert, expert=expert,
job=data.get("job") or "", job=data.get("job", ""),
thinking_message=data.get("thinking_message") or "", thinking_message=data.get("thinking_message", ""),
response_hint=data.get("response_hint") or "", response_hint=data.get("response_hint", ""),
language=data.get("language") or command.analysis.language, language=data.get("language", command.analysis.language),
) )
except (json.JSONDecodeError, Exception) as e: except (json.JSONDecodeError, Exception) as e:
log.error(f"[pa] parse failed: {e}, raw: {text[:200]}") log.error(f"[pa] parse failed: {e}, raw: {text[:200]}")

View File

@ -236,7 +236,7 @@ You are one node in a pipeline: Input (perceives) -> You (reason) -> Output (spe
1. emit_actions() show buttons. Button clicks come back as "ACTION: action_name". 1. emit_actions() show buttons. Button clicks come back as "ACTION: action_name".
Stateful buttons: include var/op in payload (inc/dec/set/toggle). UI handles locally. Stateful buttons: include var/op in payload (inc/dec/set/toggle). UI handles locally.
Example: label:"+1", action:"increment", payload:{"var":"count","op":"inc","initial":0} Example: label:"+1", action:"increment", payload:{{"var":"count","op":"inc","initial":0}}
2. set_state(key, value) persistent key-value store shown as live labels. 2. set_state(key, value) persistent key-value store shown as live labels.
Survives across turns. Use for tracking mode, progress, flags. Survives across turns. Use for tracking mode, progress, flags.
@ -253,9 +253,9 @@ You are one node in a pipeline: Input (perceives) -> You (reason) -> Output (spe
destroy_machine(id) remove machine from dashboard. destroy_machine(id) remove machine from dashboard.
Example navigation menu: Example navigation menu:
create_machine(id="nav", initial="main", states=[ create_machine(id="nav", initial="main", states=[
{"name":"main","buttons":[{"label":"Menu 1","action":"menu_1","go":"sub1"},{"label":"Menu 2","action":"menu_2","go":"sub2"}],"content":["Welcome"]}, {{"name":"main","buttons":[{{"label":"Menu 1","action":"menu_1","go":"sub1"}},{{"label":"Menu 2","action":"menu_2","go":"sub2"}}],"content":["Welcome"]}},
{"name":"sub1","buttons":[{"label":"Back","action":"back","go":"main"}],"content":["Sub 1 details"]}, {{"name":"sub1","buttons":[{{"label":"Back","action":"back","go":"main"}}],"content":["Sub 1 details"]}},
{"name":"sub2","buttons":[{"label":"Back","action":"back","go":"main"}],"content":["Sub 2 details"]} {{"name":"sub2","buttons":[{{"label":"Back","action":"back","go":"main"}}],"content":["Sub 2 details"]}}
]) ])
PREFER machines over emit_actions for anything with navigation or multiple views. PREFER machines over emit_actions for anything with navigation or multiple views.
ALWAYS include states when creating a machine. Never write code use the tool. ALWAYS include states when creating a machine. Never write code use the tool.
@ -350,10 +350,10 @@ conn.commit()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = cursor.fetchall() tables = cursor.fetchall()
for t in tables: for t in tables:
cursor.execute(f"SELECT * FROM {t[0]}") cursor.execute(f"SELECT * FROM {{t[0]}}")
rows = cursor.fetchall() rows = cursor.fetchall()
cols = [d[0] for d in cursor.description] cols = [d[0] for d in cursor.description]
print(f"Table: {t[0]}") print(f"Table: {{t[0]}}")
print(" | ".join(cols)) print(" | ".join(cols))
for row in rows: for row in rows:
print(" | ".join(str(c) for c in row)) print(" | ".join(str(c) for c in row))
@ -446,7 +446,7 @@ conn.close()'''
await self.hud("thinking", detail="reasoning about response") await self.hud("thinking", detail="reasoning about response")
messages = [ messages = [
{"role": "system", "content": self.SYSTEM.replace("{memory_context}", memory_context)}, {"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
] ]
for msg in history[-12:]: for msg in history[-12:]:
messages.append(msg) messages.append(msg)

View File

@ -88,7 +88,7 @@ Rules:
hint += f"\nTool result:\n{tool_output[:500]}" hint += f"\nTool result:\n{tool_output[:500]}"
messages = [ messages = [
{"role": "system", "content": self.RESPONSE_SYSTEM.replace("{hint}", hint)}, {"role": "system", "content": self.RESPONSE_SYSTEM.format(hint=hint)},
] ]
for msg in history[-8:]: for msg in history[-8:]:
messages.append(msg) messages.append(msg)

View File

@ -2,10 +2,9 @@
import json import json
import logging import logging
import uuid
from .base import Node from .base import Node
from ..types import ThoughtResult, Artifact from ..types import ThoughtResult
log = logging.getLogger("runtime") log = logging.getLogger("runtime")
@ -17,7 +16,6 @@ class UINode(Node):
def __init__(self, send_hud): def __init__(self, send_hud):
super().__init__(send_hud) super().__init__(send_hud)
self.thinker_controls: list[dict] = [] # buttons, labels, tables from Thinker self.thinker_controls: list[dict] = [] # buttons, labels, tables from Thinker
self.artifacts: list[dict] = [] # typed workspace artifacts
self.state: dict = {} # {"count": 0, "theme": "dark", ...} self.state: dict = {} # {"count": 0, "theme": "dark", ...}
self.bindings: dict = {} # {"increment": {"op": "inc", "var": "count"}, ...} self.bindings: dict = {} # {"increment": {"op": "inc", "var": "count"}, ...}
self.machines: dict = {} # {"nav": {initial, states, current}, ...} self.machines: dict = {} # {"nav": {initial, states, current}, ...}
@ -81,7 +79,6 @@ class UINode(Node):
"initial": initial, "initial": initial,
"current": initial, "current": initial,
"states": states, "states": states,
"data": {}, # wizard field storage (e.g. {"bundesland": "Bayern"})
} }
log.info(f"[ui] machine created: {mid} (initial={initial}, {len(states)} states)") log.info(f"[ui] machine created: {mid} (initial={initial}, {len(states)} states)")
await self.hud("machine_created", id=mid, initial=initial, state_count=len(states)) await self.hud("machine_created", id=mid, initial=initial, state_count=len(states))
@ -107,28 +104,6 @@ class UINode(Node):
log.info(f"[ui] machine reset: {mid} -> {initial}") log.info(f"[ui] machine reset: {mid} -> {initial}")
await self.hud("machine_reset", id=mid, state=initial) await self.hud("machine_reset", id=mid, state=initial)
elif op == "update_data":
if mid not in self.machines:
log.warning(f"[ui] update_data: machine '{mid}' not found")
continue
data_update = op_data.get("data", {})
self.machines[mid]["data"].update(data_update)
log.info(f"[ui] machine data updated: {mid} += {data_update}")
await self.hud("machine_data_updated", id=mid, data=data_update)
elif op == "transition":
if mid not in self.machines:
log.warning(f"[ui] transition: machine '{mid}' not found")
continue
target = op_data.get("target", "")
if target in self.machines[mid]["states"]:
old = self.machines[mid]["current"]
self.machines[mid]["current"] = target
log.info(f"[ui] machine transition (expert): {mid} {old} -> {target}")
await self.hud("machine_transitioned", id=mid, old=old, target=target)
else:
log.warning(f"[ui] transition target '{target}' not found in {mid}")
elif op == "destroy": elif op == "destroy":
if mid in self.machines: if mid in self.machines:
del self.machines[mid] del self.machines[mid]
@ -182,31 +157,15 @@ class UINode(Node):
return controls return controls
def get_machine_summary(self) -> str: def get_machine_summary(self) -> str:
"""Rich summary for PA/Thinker context — includes current state details and stored data.""" """Summary for Thinker context — shape only, not full data."""
if not self.machines: if not self.machines:
return "" return ""
parts = [] parts = []
for mid, m in self.machines.items(): for mid, m in self.machines.items():
current = m["current"] current = m["current"]
state_names = list(m["states"].keys()) state_names = list(m["states"].keys())
state_def = m["states"].get(current, {}) parts.append(f" machine '{mid}': state={current}, states={state_names}")
line = f" machine '{mid}': state={current}, states={state_names}" return "Machines:\n" + "\n".join(parts)
# Current state content
content = state_def.get("content", [])
if content:
line += f", content={content}"
# Current state buttons
buttons = state_def.get("buttons", [])
if buttons:
btn_labels = [b.get("label", b.get("action", "?")) for b in buttons if isinstance(b, dict)]
if btn_labels:
line += f", buttons={btn_labels}"
# Stored wizard data
data = m.get("data", {})
if data:
line += f", data={data}"
parts.append(line)
return "Active machines (interactive wizard/workflow state):\n" + "\n".join(parts)
# --- State operations --- # --- State operations ---
@ -347,17 +306,12 @@ class UINode(Node):
"value": str(value), "value": str(value),
}) })
# 4. Add display items (cards, lists, or simple display) # 4. Add display items from Thinker's emit_display() calls
if thought.display_items: if thought.display_items:
for item in thought.display_items: for item in thought.display_items:
item_type = item.get("type", "text")
if item_type in ("card", "list"):
# Pass through structured components as-is
controls.append(item)
else:
controls.append({ controls.append({
"type": "display", "type": "display",
"display_type": item_type, "display_type": item.get("type", "text"),
"label": item.get("label", ""), "label": item.get("label", ""),
"value": item.get("value", ""), "value": item.get("value", ""),
"style": item.get("style", ""), "style": item.get("style", ""),
@ -384,155 +338,21 @@ class UINode(Node):
return controls return controls
def _build_artifacts(self, thought: ThoughtResult) -> list[dict]:
"""Convert ThoughtResult into typed artifacts."""
arts = []
# 1. Direct artifacts from expert's emit_artifact calls
if thought.artifacts:
for a in thought.artifacts:
if not a.get("id"):
a["id"] = str(uuid.uuid4())[:8]
arts.append(a)
# 2. Convert display_items (cards, lists) → entity_detail artifacts
if thought.display_items:
for item in thought.display_items:
item_type = item.get("type", "text")
if item_type == "card":
arts.append({
"id": str(uuid.uuid4())[:8],
"type": "entity_detail",
"data": {
"title": item.get("title", ""),
"subtitle": item.get("subtitle", ""),
"fields": item.get("fields", []),
},
"actions": item.get("actions", []),
"meta": {},
})
elif item_type == "list":
arts.append({
"id": str(uuid.uuid4())[:8],
"type": "entity_detail",
"data": {
"title": item.get("title", ""),
"items": item.get("items", []),
},
"actions": [],
"meta": {"list": True},
})
else:
arts.append({
"id": str(uuid.uuid4())[:8],
"type": "status",
"data": {
"display_type": item_type,
"label": item.get("label", ""),
"value": item.get("value", ""),
"style": item.get("style", ""),
},
"actions": [],
"meta": {},
})
# 3. Convert actions → action_bar artifact
if thought.actions:
btns = self._parse_thinker_actions(thought.actions)
arts.append({
"id": "action_bar",
"type": "action_bar",
"data": {},
"actions": [{"label": b["label"], "action": b["action"],
"payload": b.get("payload", {})} for b in btns],
"meta": {},
})
elif self.thinker_controls:
# Preserve existing buttons as action_bar
existing_btns = [c for c in self.thinker_controls if c.get("type") == "button"]
if existing_btns:
arts.append({
"id": "action_bar",
"type": "action_bar",
"data": {},
"actions": [{"label": b["label"], "action": b["action"],
"payload": b.get("payload", {})} for b in existing_btns],
"meta": {},
})
# 4. Convert tool_output table → data_table artifact
if thought.tool_output:
table = self._extract_table(thought.tool_output)
if table:
arts.append({
"id": str(uuid.uuid4())[:8],
"type": "data_table",
"data": {
"columns": table["columns"],
"rows": table["data"],
},
"actions": [],
"meta": {"source": thought.tool_used or "query_db"},
})
# 5. State variables → status artifacts
if thought.state_updates:
for key, value in thought.state_updates.items():
self.set_var(key, value)
for var, value in self.state.items():
arts.append({
"id": f"state_{var}",
"type": "status",
"data": {"label": var, "value": str(value), "display_type": "text"},
"actions": [],
"meta": {"state_var": True},
})
# 6. Machines → machine artifacts
for mid, machine in self.machines.items():
current = machine["current"]
state_def = machine["states"].get(current, {})
arts.append({
"id": f"machine_{mid}",
"type": "machine",
"data": {
"machine_id": mid,
"current": current,
"states": list(machine["states"].keys()),
"content": state_def.get("content", []),
"stored_data": machine.get("data", {}),
},
"actions": [{"label": b.get("label", ""), "action": b.get("action", ""),
"go": b.get("go", "")}
for b in state_def.get("buttons", []) if isinstance(b, dict)],
"meta": {"live": True},
})
return arts
def get_artifacts(self) -> list[dict]:
"""Return current artifact list."""
return self.artifacts
async def process(self, thought: ThoughtResult, history: list[dict], async def process(self, thought: ThoughtResult, history: list[dict],
memory_context: str = "") -> list[dict]: memory_context: str = "") -> list[dict]:
# Apply machine ops first (create/add_state/reset/destroy) # Apply machine ops first (create/add_state/reset/destroy)
if thought.machine_ops: if thought.machine_ops:
await self.apply_machine_ops(thought.machine_ops) await self.apply_machine_ops(thought.machine_ops)
# Build artifacts (new system)
self.artifacts = self._build_artifacts(thought)
# Build legacy controls (backward compat)
thinker_ctrls = self._build_controls(thought) thinker_ctrls = self._build_controls(thought)
if thinker_ctrls: if thinker_ctrls:
self.thinker_controls = thinker_ctrls self.thinker_controls = thinker_ctrls
# Always emit the merged view (thinker + machine) # Always emit the merged view (thinker + machine)
merged = self.current_controls merged = self.current_controls
if merged or self.artifacts: if merged:
await self.hud("controls", controls=merged) await self.hud("controls", controls=merged)
log.info(f"[ui] emitting {len(merged)} controls + {len(self.artifacts)} artifacts") log.info(f"[ui] emitting {len(merged)} controls ({len(self.thinker_controls)} thinker + {len(self.get_machine_controls())} machine)")
else: else:
await self.hud("decided", instruction="no new controls") await self.hud("decided", instruction="no new controls")

View File

@ -17,7 +17,7 @@ log = logging.getLogger("runtime")
TRACE_FILE = Path(__file__).parent.parent / "trace.jsonl" TRACE_FILE = Path(__file__).parent.parent / "trace.jsonl"
# Default graph — can be switched at runtime # Default graph — can be switched at runtime
_active_graph_name = "v4-eras" _active_graph_name = "v1-current"
class OutputSink: class OutputSink:
@ -56,13 +56,6 @@ class OutputSink:
except Exception: except Exception:
pass pass
async def send_artifacts(self, artifacts: list):
if self.ws:
try:
await self.ws.send_text(json.dumps({"type": "artifacts", "artifacts": artifacts}))
except Exception:
pass
async def send_hud(self, data: dict): async def send_hud(self, data: dict):
if self.ws: if self.ws:
try: try:
@ -228,9 +221,8 @@ class Runtime:
self.history.append({"role": "user", "content": action_desc}) self.history.append({"role": "user", "content": action_desc})
sensor_lines = self.sensor.get_context_lines() sensor_lines = self.sensor.get_context_lines()
director_line = self.director.get_context_line() if self.director else "" director_line = self.director.get_context_line()
mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state) mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state)
if director_line:
mem_ctx += f"\n\n{director_line}" mem_ctx += f"\n\n{director_line}"
command = Command( command = Command(
@ -250,7 +242,7 @@ class Runtime:
self.history.append({"role": "assistant", "content": response}) self.history.append({"role": "assistant", "content": response})
await self.memorizer.update(self.history) await self.memorizer.update(self.history)
if not self.is_v2 and self.director: if not self.is_v2:
await self.director.update(self.history, self.memorizer.state) await self.director.update(self.history, self.memorizer.state)
if len(self.history) > self.MAX_HISTORY: if len(self.history) > self.MAX_HISTORY:
@ -327,9 +319,8 @@ class Runtime:
# Check Sensor flags (idle return, workspace mismatch) # Check Sensor flags (idle return, workspace mismatch)
sensor_flags = self.sensor.consume_flags() sensor_flags = self.sensor.consume_flags()
sensor_lines = self.sensor.get_context_lines() sensor_lines = self.sensor.get_context_lines()
director_line = self.director.get_context_line() if self.director else "" director_line = self.director.get_context_line()
mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state) mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state)
if director_line:
mem_ctx += f"\n\n{director_line}" mem_ctx += f"\n\n{director_line}"
machine_summary = self.ui_node.get_machine_summary() machine_summary = self.ui_node.get_machine_summary()
if machine_summary: if machine_summary:

View File

@ -76,19 +76,6 @@ class PARouting:
language: str = "de" # Response language language: str = "de" # Response language
@dataclass
class Artifact:
"""A typed workspace item. The unit of workspace content."""
id: str # unique ID
type: str # entity_detail | data_table | document_page | action_bar | status
data: dict = field(default_factory=dict) # type-specific payload
actions: list = field(default_factory=list) # [{label, action, payload?}]
meta: dict = field(default_factory=dict) # {entity?, related?, source_query?}
def to_dict(self) -> dict:
return asdict(self)
@dataclass @dataclass
class ThoughtResult: class ThoughtResult:
"""Thinker node's output — either a direct answer or tool results.""" """Thinker node's output — either a direct answer or tool results."""
@ -99,5 +86,3 @@ class ThoughtResult:
state_updates: dict = field(default_factory=dict) # {key: value} from set_state state_updates: dict = field(default_factory=dict) # {key: value} from set_state
display_items: list = field(default_factory=list) # [{type, label, value?, style?}] from emit_display display_items: list = field(default_factory=list) # [{type, label, value?, style?}] from emit_display
machine_ops: list = field(default_factory=list) # [{op, id, ...}] from machine tools machine_ops: list = field(default_factory=list) # [{op, id, ...}] from machine tools
errors: list = field(default_factory=list) # [{query, error, describe?}] from failed retries
artifacts: list = field(default_factory=list) # [Artifact] from emit_artifact

View File

@ -255,24 +255,14 @@ def check_actions(actions: list, check: str) -> tuple[bool, str]:
return True, f"{len(actions)} actions >= {expected}" return True, f"{len(actions)} actions >= {expected}"
return False, f"{len(actions)} actions < {expected}" return False, f"{len(actions)} actions < {expected}"
# has TYPE or has TYPE1 or TYPE2 # has table
m = re.match(r'has\s+(.+)', check) if check.strip() == "has table":
if m:
types = [t.strip() for t in m.group(1).split(" or has ")]
# Also handle "card or has table" → ["card", "table"]
types = [t.replace("has ", "") for t in types]
for a in actions: for a in actions:
if isinstance(a, dict) and a.get("type") in types: if isinstance(a, dict) and a.get("type") == "table":
atype = a.get("type") cols = a.get("columns", [])
if atype == "table": rows = len(a.get("data", []))
return True, f"table found: {len(a.get('columns', []))} cols, {len(a.get('data', []))} rows" return True, f"table found: {len(cols)} cols, {rows} rows"
elif atype == "card": return False, f"no table in {len(actions)} controls"
return True, f"card found: {a.get('title', '?')}, {len(a.get('fields', []))} fields"
elif atype == "list":
return True, f"list found: {a.get('title', '?')}, {len(a.get('items', []))} items"
else:
return True, f"{atype} found"
return False, f"no {' or '.join(types)} in {len(actions)} controls ({[a.get('type','?') for a in actions if isinstance(a, dict)]})"
# any action contains "foo" or "bar" — searches buttons only # any action contains "foo" or "bar" — searches buttons only
m = re.match(r'any action contains\s+"?(.+?)"?\s*$', check) m = re.match(r'any action contains\s+"?(.+?)"?\s*$', check)
@ -382,12 +372,6 @@ def check_trace(trace: list, check: str) -> tuple[bool, str]:
return True, f"found reset_machine via machine_reset event" return True, f"found reset_machine via machine_reset event"
if t.get("event") == "machine_destroyed" and tool_name == "destroy_machine": if t.get("event") == "machine_destroyed" and tool_name == "destroy_machine":
return True, f"found destroy_machine via machine_destroyed event" return True, f"found destroy_machine via machine_destroyed event"
if t.get("event") == "machine_data_updated" and tool_name == "update_machine":
return True, f"found update_machine via machine_data_updated event"
if t.get("event") == "machine_transitioned" and tool_name == "transition_machine":
return True, f"found transition_machine via machine_transitioned event"
if t.get("event") == "pa_retry" and tool_name == "pa_retry":
return True, f"found pa_retry event"
return False, f"no tool_call '{tool_name}' in trace" return False, f"no tool_call '{tool_name}' in trace"
# machine_created id="NAV" — checks for specific machine creation # machine_created id="NAV" — checks for specific machine creation

View File

@ -953,24 +953,6 @@ function send() {
inputEl.value = ''; inputEl.value = '';
} }
async function clearSession() {
try {
const headers = { 'Content-Type': 'application/json' };
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
await fetch('/api/clear', { method: 'POST', headers });
// Clear UI
msgs.innerHTML = '';
traceEl.innerHTML = '';
_currentDashboard = [];
currentEl = null;
const dock = document.getElementById('dock');
if (dock) dock.innerHTML = '';
addTrace('runtime', 'cleared', 'session reset');
} catch (e) {
addTrace('runtime', 'error', 'clear failed: ' + e);
}
}
// --- Awareness panel updates --- // --- Awareness panel updates ---
let _sensorReadings = {}; let _sensorReadings = {};

View File

@ -16,8 +16,6 @@
<h1>cog</h1> <h1>cog</h1>
<div id="test-status"></div> <div id="test-status"></div>
<div style="flex:1"></div> <div style="flex:1"></div>
<div id="graph-switcher"></div>
<button onclick="clearSession()" class="btn-top" title="Clear session">Clear</button>
<div id="status">disconnected</div> <div id="status">disconnected</div>
</div> </div>
@ -29,7 +27,17 @@
</div> </div>
<div class="panel detail-panel"> <div class="panel detail-panel">
<div class="panel-header detail-h">Nodes</div> <div class="panel-header detail-h">Nodes</div>
<div id="node-metrics"></div> <div id="node-metrics">
<div class="node-meter" id="meter-input"><span class="nm-label">input</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-director_v2"><span class="nm-label">director</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-pa_v1"><span class="nm-label">PA</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-thinker"><span class="nm-label">thinker</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-eras_expert"><span class="nm-label">eras</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-output"><span class="nm-label">output</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-memorizer"><span class="nm-label">memo</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-interpreter"><span class="nm-label">interp</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-sensor"><span class="nm-label">sensor</span><span class="nm-text" style="flex:1"></span></div>
</div>
</div> </div>
<div class="panel graph-panel"> <div class="panel graph-panel">
<div class="panel-header graph-h">Graph <div class="panel-header graph-h">Graph
@ -50,6 +58,7 @@
<div id="input-bar"> <div id="input-bar">
<input id="input" placeholder="Type a message..." autocomplete="off"> <input id="input" placeholder="Type a message..." autocomplete="off">
<button onclick="send()">Send</button> <button onclick="send()">Send</button>
<button onclick="clearSession()" class="btn-clear" title="Clear session">&#x2715;</button>
</div> </div>
</div> </div>
<div class="panel awareness-panel"> <div class="panel awareness-panel">

View File

@ -1,178 +1,15 @@
/** Awareness panel: memorizer state, sensor readings. /** Awareness panel: memorizer state, sensor readings, node meters. */
* Node detail panel: per-node model, tokens, progress, last event.
*/
import { esc, truncate } from './util.js'; import { esc, truncate } from './util.js';
let _sensorReadings = {}; let _sensorReadings = {};
// --- Node state tracker ---
const _nodeState = {}; // { nodeName: { model, tokens, maxTokens, fillPct, lastEvent, lastDetail, status, toolCalls, startedAt } }
// Normalize node names to avoid duplicates (pa_v1→pa, expert_eras→eras, etc.)
function _normName(name) {
return name.replace('_v1', '').replace('_v2', '').replace('expert_', '');
}
function _getNode(name) {
const key = _normName(name);
if (!_nodeState[key]) {
_nodeState[key] = {
model: '', tokens: 0, maxTokens: 0, fillPct: 0,
lastEvent: '', lastDetail: '', status: 'idle',
toolCalls: 0, lastTool: '',
};
}
return _nodeState[key];
}
export function updateNodeFromHud(node, event, data) {
const n = _getNode(node);
if (event === 'context') {
if (data.model) n.model = data.model.replace('google/', '').replace('anthropic/', '');
if (data.tokens !== undefined) n.tokens = data.tokens;
if (data.max_tokens !== undefined) n.maxTokens = data.max_tokens;
if (data.fill_pct !== undefined) n.fillPct = data.fill_pct;
}
if (event === 'thinking') {
n.status = 'thinking';
n.lastEvent = 'thinking';
n.lastDetail = data.detail || '';
} else if (event === 'perceived') {
n.status = 'done';
n.lastEvent = 'perceived';
const a = data.analysis || {};
n.lastDetail = `${a.intent || '?'}/${a.language || '?'}/${a.tone || '?'}`;
} else if (event === 'decided' || event === 'routed') {
n.status = 'done';
n.lastEvent = event;
n.lastDetail = data.goal || data.instruction || data.job || '';
} else if (event === 'tool_call') {
n.status = 'tool';
n.lastEvent = 'tool_call';
n.lastTool = data.tool || '';
n.lastDetail = data.tool || '';
n.toolCalls++;
} else if (event === 'tool_result') {
n.lastEvent = 'tool_result';
n.lastDetail = truncate(data.output || '', 50);
} else if (event === 'streaming') {
n.status = 'streaming';
n.lastEvent = 'streaming';
} else if (event === 'done') {
n.status = 'done';
n.lastEvent = 'done';
} else if (event === 'updated') {
n.status = 'done';
n.lastEvent = 'updated';
} else if (event === 'planned') {
n.status = 'planned';
n.lastEvent = 'planned';
n.lastDetail = `${data.tools || 0} tools`;
} else if (event === 'interpreted') {
n.status = 'done';
n.lastEvent = 'interpreted';
n.lastDetail = truncate(data.summary || '', 50);
}
renderNodes();
}
// Fixed pipeline order — no re-sorting
// Fixed pipeline order using normalized names
const PIPELINE_ORDER = ['input', 'pa', 'director', 'eras', 'plankiste',
'thinker', 'interpreter', 'output', 'memorizer', 'ui', 'sensor'];
function renderNodes() {
const el = document.getElementById('node-metrics');
if (!el) return;
const entries = Object.entries(_nodeState)
.filter(([name]) => name !== 'runtime' && name !== 'frame_engine');
const sorted = entries.sort((a, b) => {
const ia = PIPELINE_ORDER.indexOf(a[0]);
const ib = PIPELINE_ORDER.indexOf(b[0]);
return (ia === -1 ? 99 : ia) - (ib === -1 ? 99 : ib);
});
let html = '';
for (const [name, n] of sorted) {
const statusClass = n.status === 'thinking' || n.status === 'tool' ? 'nm-active'
: n.status === 'streaming' ? 'nm-streaming' : '';
const shortName = name.replace('_v1', '').replace('_v2', '').replace('expert_', '');
const modelShort = n.model ? n.model.split('/').pop().replace('-001', '').replace('-4.5', '4.5') : '';
const tokenStr = n.maxTokens ? `${n.tokens}/${n.maxTokens}t` : '';
const fillW = n.fillPct || 0;
const detail = n.lastDetail ? truncate(n.lastDetail, 45) : '';
const toolStr = n.toolCalls > 0 ? ` [${n.toolCalls} calls]` : '';
html += `<div class="node-card ${statusClass}">
<div class="nc-header">
<span class="nc-name">${esc(shortName)}</span>
<span class="nc-model">${esc(modelShort)}</span>
<span class="nc-tokens">${esc(tokenStr)}</span>
</div>
<div class="nc-bar"><div class="nc-fill" style="width:${fillW}%"></div></div>
<div class="nc-status">
<span class="nc-event">${esc(n.lastEvent)}</span>
<span class="nc-detail">${esc(detail)}${esc(toolStr)}</span>
</div>
</div>`;
}
el.innerHTML = html;
}
export function initNodesFromGraph(graphData) {
// Populate node cards from graph definition (before any messages)
const nodes = graphData.nodes || {};
const details = graphData.node_details || {};
for (const [role, impl] of Object.entries(nodes)) {
const n = _getNode(role);
const d = details[role];
if (d) {
n.model = (d.model || '').replace('google/', '').replace('anthropic/', '');
n.maxTokens = d.max_tokens || 0;
}
n.lastEvent = 'idle';
n.status = 'idle';
}
renderNodes();
}
export function clearNodes() {
for (const key of Object.keys(_nodeState)) delete _nodeState[key];
const el = document.getElementById('node-metrics');
if (el) el.innerHTML = '';
}
// Keep old meter function for backward compat (called from ws.js)
export function updateMeter(node, tokens, maxTokens, fillPct) {
const n = _getNode(node);
n.tokens = tokens;
n.maxTokens = maxTokens;
n.fillPct = fillPct;
renderNodes();
}
// --- Awareness: memorizer state ---
export function updateAwarenessState(state) { export function updateAwarenessState(state) {
const body = document.getElementById('aw-state-body'); const body = document.getElementById('aw-state-body');
if (!body) return; if (!body) return;
const expectation = state.user_expectation || 'conversational';
const expClass = {
conversational: 'aw-exp-conv',
delegated: 'aw-exp-deleg',
waiting_input: 'aw-exp-wait',
observing: 'aw-exp-obs',
}[expectation] || '';
const display = [ const display = [
['user', state.user_name], ['user', state.user_name],
['mood', state.user_mood], ['mood', state.user_mood],
['expectation', expectation, expClass],
['topic', state.topic], ['topic', state.topic],
['lang', state.language], ['lang', state.language],
['style', state.style_hint], ['style', state.style_hint],
@ -181,8 +18,8 @@ export function updateAwarenessState(state) {
const facts = state.facts || []; const facts = state.facts || [];
const history = state.topic_history || []; const history = state.topic_history || [];
let html = display.map(([k, v, cls]) => let html = display.map(([k, v]) =>
`<div class="aw-row"><span class="aw-key">${esc(k)}</span><span class="aw-val ${cls || ''}">${esc(v || 'null')}</span></div>` `<div class="aw-row"><span class="aw-key">${esc(k)}</span><span class="aw-val">${esc(v || 'null')}</span></div>`
).join(''); ).join('');
if (facts.length) { if (facts.length) {
@ -196,8 +33,6 @@ export function updateAwarenessState(state) {
body.innerHTML = html; body.innerHTML = html;
} }
// --- Awareness: sensor readings ---
export function updateAwarenessSensors(tick, deltas) { export function updateAwarenessSensors(tick, deltas) {
const body = document.getElementById('aw-sensor-body'); const body = document.getElementById('aw-sensor-body');
if (!body) return; if (!body) return;
@ -211,3 +46,12 @@ export function updateAwarenessSensors(tick, deltas) {
} }
body.innerHTML = html; body.innerHTML = html;
} }
export function updateMeter(node, tokens, maxTokens, fillPct) {
const meter = document.getElementById('meter-' + node);
if (!meter) return;
const bar = meter.querySelector('.nm-bar');
const text = meter.querySelector('.nm-text');
if (bar) bar.style.width = fillPct + '%';
if (text) text.textContent = `${tokens}/${maxTokens}t`;
}

View File

@ -1,9 +1,6 @@
/** Dashboard: workspace artifact + control rendering. /** Dashboard: workspace controls rendering (buttons, tables, labels, displays, machines). */
* Artifact system: typed artifacts (entity_detail, data_table, document_page, action_bar, status, machine).
* Legacy: dockControls() still works as fallback for old control format.
*/
import { esc, renderMarkdown } from './util.js'; import { esc } from './util.js';
import { addTrace } from './trace.js'; import { addTrace } from './trace.js';
import { setDashboard } from './chat.js'; import { setDashboard } from './chat.js';
@ -11,233 +8,8 @@ let _ws = null;
export function setWs(ws) { _ws = ws; } export function setWs(ws) { _ws = ws; }
function _sendAction(action, data) {
if (_ws && _ws.readyState === 1) {
_ws.send(JSON.stringify({ type: 'action', action, data: data || {} }));
addTrace('runtime', 'action', action);
}
}
// --- Artifact system ---
export function dockArtifacts(artifacts) {
const body = document.getElementById('workspace-body');
if (!body) return;
body.innerHTML = '';
const container = document.createElement('div');
container.className = 'artifacts-container';
for (const art of artifacts) {
const wrapper = document.createElement('div');
wrapper.className = 'ws-artifact ws-artifact-' + (art.type || 'unknown');
wrapper.dataset.artifactId = art.id || '';
const renderer = RENDERERS[art.type];
if (renderer) {
renderer(wrapper, art);
} else {
wrapper.innerHTML = '<div class="ws-artifact-fallback">' + esc(JSON.stringify(art.data || {})) + '</div>';
}
container.appendChild(wrapper);
}
body.appendChild(container);
// Also set dashboard for S3* audit (flatten actions from artifacts)
const flatControls = artifacts.flatMap(a => (a.actions || []).map(act => ({type: 'button', ...act})));
setDashboard(flatControls);
}
// --- Artifact renderers ---
const RENDERERS = {
entity_detail: renderEntityDetail,
data_table: renderDataTable,
document_page: renderDocumentPage,
action_bar: renderActionBar,
status: renderStatus,
machine: renderMachine,
};
function renderEntityDetail(el, art) {
const d = art.data || {};
let html = '';
if (d.title) html += '<div class="ws-card-title">' + esc(d.title) + '</div>';
if (d.subtitle) html += '<div class="ws-card-subtitle">' + esc(d.subtitle) + '</div>';
// List mode (multiple items)
if (d.items && d.items.length) {
html += '<div class="ws-list">';
for (const item of d.items) {
html += '<div class="ws-card ws-card-nested">';
if (item.title) html += '<div class="ws-card-title">' + esc(item.title) + '</div>';
if (item.fields) {
html += '<div class="ws-card-fields">';
for (const f of item.fields) {
html += '<div class="ws-card-field"><span class="ws-card-key">' + esc(f.label || '') + '</span><span class="ws-card-val">' + esc(String(f.value ?? '')) + '</span></div>';
}
html += '</div>';
}
html += '</div>';
}
html += '</div>';
}
// Single entity fields
if (d.fields && d.fields.length) {
html += '<div class="ws-card-fields">';
for (const f of d.fields) {
const val = f.action
? '<span class="ws-card-link" data-action="' + esc(f.action) + '">' + esc(String(f.value ?? '')) + '</span>'
: '<span class="ws-card-val">' + esc(String(f.value ?? '')) + '</span>';
html += '<div class="ws-card-field"><span class="ws-card-key">' + esc(f.label || '') + '</span>' + val + '</div>';
}
html += '</div>';
}
// Actions
if (art.actions && art.actions.length) {
html += '<div class="ws-card-actions">';
for (const a of art.actions) {
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
}
html += '</div>';
}
el.innerHTML = html;
_wireActions(el);
}
function renderDataTable(el, art) {
const d = art.data || {};
if (d.title) {
const title = document.createElement('div');
title.className = 'ws-artifact-header';
title.textContent = d.title;
el.appendChild(title);
}
const table = document.createElement('table');
table.className = 'control-table';
const cols = d.columns || (d.rows && d.rows.length ? Object.keys(d.rows[0]) : []);
if (cols.length) {
const thead = document.createElement('tr');
for (const col of cols) {
const th = document.createElement('th');
th.textContent = col;
thead.appendChild(th);
}
table.appendChild(thead);
}
for (const row of (d.rows || d.data || [])) {
const tr = document.createElement('tr');
if (Array.isArray(row)) {
for (const cell of row) {
const td = document.createElement('td'); td.textContent = cell; tr.appendChild(td);
}
} else if (typeof row === 'object') {
for (const col of cols) {
const td = document.createElement('td'); td.textContent = row[col] ?? ''; tr.appendChild(td);
}
}
table.appendChild(tr);
}
el.appendChild(table);
}
function renderDocumentPage(el, art) {
const d = art.data || {};
let html = '';
if (d.title) html += '<div class="ws-doc-title">' + esc(d.title) + '</div>';
for (const section of (d.sections || [])) {
html += '<div class="ws-doc-section">';
if (section.heading) html += '<div class="ws-doc-heading">' + esc(section.heading) + '</div>';
if (section.content) html += '<div class="ws-doc-content">' + renderMarkdown(section.content) + '</div>';
html += '</div>';
}
// Actions (e.g. PDF export)
if (art.actions && art.actions.length) {
html += '<div class="ws-card-actions">';
for (const a of art.actions) {
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
}
html += '</div>';
}
el.innerHTML = html;
_wireActions(el);
}
function renderActionBar(el, art) {
for (const a of (art.actions || [])) {
const btn = document.createElement('button');
btn.className = 'control-btn';
btn.textContent = a.label || '';
btn.onclick = () => _sendAction(a.action, a.payload || {});
el.appendChild(btn);
}
}
function renderStatus(el, art) {
const d = art.data || {};
const dt = d.display_type || 'text';
el.classList.add('display-' + dt);
if (dt === 'progress') {
const pct = Math.min(100, Math.max(0, Number(d.value) || 0));
el.innerHTML = '<span class="cd-label">' + esc(d.label) + '</span>'
+ '<div class="cd-bar"><div class="cd-fill" style="width:' + pct + '%"></div></div>'
+ '<span class="cd-pct">' + pct + '%</span>';
} else if (dt === 'info') {
el.innerHTML = '<span class="cd-icon">\u2139</span><span class="cd-label">' + esc(d.label) + '</span>';
} else {
el.innerHTML = '<span class="cd-label">' + esc(d.label || '') + '</span>'
+ (d.value ? '<span class="cd-value">' + esc(String(d.value)) + '</span>' : '');
}
}
function renderMachine(el, art) {
const d = art.data || {};
const mid = d.machine_id || '';
// Header
let html = '<div class="ws-machine-header"><span class="ws-machine-name">' + esc(mid) + '</span>'
+ '<span class="ws-machine-state">' + esc(d.current || '') + '</span></div>';
// Content
for (const text of (d.content || [])) {
html += '<div class="ws-machine-content">' + esc(text) + '</div>';
}
// Stored data
const stored = d.stored_data || {};
if (Object.keys(stored).length) {
html += '<div class="ws-machine-data">';
for (const [k, v] of Object.entries(stored)) {
html += '<span class="ws-machine-datum">' + esc(k) + '=' + esc(String(v)) + '</span>';
}
html += '</div>';
}
// Buttons
if (art.actions && art.actions.length) {
html += '<div class="ws-card-actions">';
for (const a of art.actions) {
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
}
html += '</div>';
}
el.innerHTML = html;
_wireActions(el);
}
// --- Helpers ---
function _wireActions(el) {
el.querySelectorAll('.ws-card-link').forEach(link => {
link.onclick = (e) => { e.stopPropagation(); _sendAction(link.dataset.action, {}); };
});
el.querySelectorAll('.ws-card-btn').forEach(btn => {
btn.onclick = (e) => { e.stopPropagation(); _sendAction(btn.dataset.action, {}); };
});
}
// --- Legacy control rendering (backward compat) ---
export function dockControls(controls) { export function dockControls(controls) {
setDashboard(controls); setDashboard(controls); // S3*: remember what's rendered
const body = document.getElementById('workspace-body'); const body = document.getElementById('workspace-body');
if (!body) return; if (!body) return;
body.innerHTML = ''; body.innerHTML = '';
@ -249,7 +21,12 @@ export function dockControls(controls) {
const btn = document.createElement('button'); const btn = document.createElement('button');
btn.className = 'control-btn'; btn.className = 'control-btn';
btn.textContent = ctrl.label; btn.textContent = ctrl.label;
btn.onclick = () => _sendAction(ctrl.action, ctrl.payload || ctrl.data || {}); btn.onclick = () => {
if (_ws && _ws.readyState === 1) {
_ws.send(JSON.stringify({ type: 'action', action: ctrl.action, data: ctrl.payload || ctrl.data || {} }));
addTrace('runtime', 'action', ctrl.action);
}
};
container.appendChild(btn); container.appendChild(btn);
} else if (ctrl.type === 'table') { } else if (ctrl.type === 'table') {
const table = document.createElement('table'); const table = document.createElement('table');
@ -257,16 +34,22 @@ export function dockControls(controls) {
if (ctrl.columns) { if (ctrl.columns) {
const thead = document.createElement('tr'); const thead = document.createElement('tr');
for (const col of ctrl.columns) { for (const col of ctrl.columns) {
const th = document.createElement('th'); th.textContent = col; thead.appendChild(th); const th = document.createElement('th');
th.textContent = col;
thead.appendChild(th);
} }
table.appendChild(thead); table.appendChild(thead);
} }
for (const row of (ctrl.data || [])) { for (const row of (ctrl.data || [])) {
const tr = document.createElement('tr'); const tr = document.createElement('tr');
if (Array.isArray(row)) { if (Array.isArray(row)) {
for (const cell of row) { const td = document.createElement('td'); td.textContent = cell; tr.appendChild(td); } for (const cell of row) {
const td = document.createElement('td'); td.textContent = cell; tr.appendChild(td);
}
} else if (typeof row === 'object') { } else if (typeof row === 'object') {
for (const col of (ctrl.columns || Object.keys(row))) { const td = document.createElement('td'); td.textContent = row[col] ?? ''; tr.appendChild(td); } for (const col of (ctrl.columns || Object.keys(row))) {
const td = document.createElement('td'); td.textContent = row[col] ?? ''; tr.appendChild(td);
}
} }
table.appendChild(tr); table.appendChild(tr);
} }
@ -279,37 +62,21 @@ export function dockControls(controls) {
} else if (ctrl.type === 'display') { } else if (ctrl.type === 'display') {
const disp = document.createElement('div'); const disp = document.createElement('div');
const dt = ctrl.display_type || 'text'; const dt = ctrl.display_type || 'text';
disp.className = 'control-display display-' + dt; const style = ctrl.style ? ' display-' + ctrl.style : '';
disp.className = 'control-display display-' + dt + style;
if (dt === 'progress') { if (dt === 'progress') {
const pct = Math.min(100, Math.max(0, Number(ctrl.value) || 0)); const pct = Math.min(100, Math.max(0, Number(ctrl.value) || 0));
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span><div class="cd-bar"><div class="cd-fill" style="width:' + pct + '%"></div></div><span class="cd-pct">' + pct + '%</span>'; disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span>'
+ '<div class="cd-bar"><div class="cd-fill" style="width:' + pct + '%"></div></div>'
+ '<span class="cd-pct">' + pct + '%</span>';
} else if (dt === 'status') {
disp.innerHTML = '<span class="cd-icon">' + (ctrl.style === 'success' ? '\u2713' : ctrl.style === 'error' ? '\u2717' : '\u2139') + '</span>'
+ '<span class="cd-label">' + esc(ctrl.label) + '</span>';
} else { } else {
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span>' + (ctrl.value ? '<span class="cd-value">' + esc(String(ctrl.value)) + '</span>' : ''); disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span>'
+ (ctrl.value ? '<span class="cd-value">' + esc(String(ctrl.value)) + '</span>' : '');
} }
container.appendChild(disp); container.appendChild(disp);
} else if (ctrl.type === 'card') {
const card = document.createElement('div');
card.className = 'ws-card';
let html = '';
if (ctrl.title) html += '<div class="ws-card-title">' + esc(ctrl.title) + '</div>';
if (ctrl.subtitle) html += '<div class="ws-card-subtitle">' + esc(ctrl.subtitle) + '</div>';
if (ctrl.fields && ctrl.fields.length) {
html += '<div class="ws-card-fields">';
for (const f of ctrl.fields) {
html += '<div class="ws-card-field"><span class="ws-card-key">' + esc(f.label || '') + '</span><span class="ws-card-val">' + esc(String(f.value ?? '')) + '</span></div>';
}
html += '</div>';
}
if (ctrl.actions && ctrl.actions.length) {
html += '<div class="ws-card-actions">';
for (const a of ctrl.actions) {
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
}
html += '</div>';
}
card.innerHTML = html;
_wireActions(card);
container.appendChild(card);
} }
} }
body.appendChild(container); body.appendChild(container);

View File

@ -1,12 +1,7 @@
/** Pipeline graph: Cytoscape visualization + animation. */ /** Pipeline graph: Cytoscape visualization + animation. */
import { initNodesFromGraph } from './awareness.js';
let cy = null; let cy = null;
let _dragEnabled = true; let _dragEnabled = true;
// Maps HUD node names → graph node IDs (built from graph definition)
// e.g. {"eras_expert": "expert_eras", "pa_v1": "pa", "thinker_v2": "thinker"}
let _nodeNameToId = {};
let _physicsRunning = false; let _physicsRunning = false;
let _physicsLayout = null; let _physicsLayout = null;
let _colaSpacing = 25; let _colaSpacing = 25;
@ -95,13 +90,6 @@ export async function initGraph() {
if (resp.ok) { if (resp.ok) {
const graph = await resp.json(); const graph = await resp.json();
graphElements = buildGraphElements(graph, mx, cw, mid, row1, row2); graphElements = buildGraphElements(graph, mx, cw, mid, row1, row2);
initNodesFromGraph(graph);
// Build HUD name → graph ID mapping: {impl_name: role}
_nodeNameToId = {};
for (const [role, impl] of Object.entries(graph.nodes || {})) {
_nodeNameToId[impl] = role; // "eras_expert" → "expert_eras"
_nodeNameToId[role] = role; // "expert_eras" → "expert_eras"
}
} }
} catch (e) {} } catch (e) {}
@ -161,24 +149,6 @@ export async function initGraph() {
}); });
} }
// --- Animation queue: batch rapid events, play sequentially ---
const _animQueue = [];
let _animRunning = false;
const ANIM_INTERVAL = 200; // ms between queued animations
function _enqueue(fn) {
_animQueue.push(fn);
if (!_animRunning) _flushQueue();
}
function _flushQueue() {
if (!_animQueue.length) { _animRunning = false; return; }
_animRunning = true;
const fn = _animQueue.shift();
fn();
setTimeout(_flushQueue, ANIM_INTERVAL);
}
function pulseNode(id) { function pulseNode(id) {
if (!cy) return; if (!cy) return;
const node = cy.getElementById(id); const node = cy.getElementById(id);
@ -197,29 +167,29 @@ function flashEdge(sourceId, targetId) {
export function graphAnimate(event, node) { export function graphAnimate(event, node) {
if (!cy) return; if (!cy) return;
// Resolve HUD node name to graph ID (e.g. "eras_expert" → "expert_eras") if (node && cy.getElementById(node).length) pulseNode(node);
const graphId = _nodeNameToId[node] || node;
_enqueue(() => {
if (graphId && cy.getElementById(graphId).length) pulseNode(graphId);
switch (event) { switch (event) {
case 'perceived': pulseNode('input'); flashEdge('user', 'input'); break; case 'perceived': pulseNode('input'); flashEdge('user', 'input'); break;
case 'decided': case 'decided':
pulseNode(graphId); flashEdge(graphId, 'output'); if (node === 'director_v2' || node === 'director' || node === 'pa_v1') {
pulseNode(node); flashEdge(node, 'thinker');
} else {
pulseNode(node || 'thinker'); flashEdge('thinker', 'output');
}
break; break;
case 'routed': pulseNode(_nodeNameToId['pa_v1'] || 'pa'); break; case 'routed': pulseNode('pa'); break;
case 'reflex_path': pulseNode('input'); flashEdge('input', 'output'); break; case 'reflex_path': pulseNode('input'); flashEdge('input', 'output'); break;
case 'streaming': if (graphId === 'output') pulseNode('output'); break; case 'streaming': if (node === 'output') pulseNode('output'); break;
case 'controls': case 'machine_created': case 'machine_transition': case 'controls': case 'machine_created': case 'machine_transition':
pulseNode('ui'); break; pulseNode('ui'); break;
case 'updated': pulseNode('memorizer'); flashEdge('output', 'memorizer'); break; case 'updated': pulseNode('memorizer'); flashEdge('output', 'memorizer'); break;
case 'tool_call': pulseNode(graphId); break; case 'tool_call': pulseNode(node || 'thinker'); break;
case 'tool_result': pulseNode(graphId); break; case 'tool_result':
case 'thinking': pulseNode(graphId); break; if (cy.getElementById('interpreter').length) pulseNode('interpreter'); break;
case 'planned': pulseNode(graphId); break; case 'thinking': if (node) pulseNode(node); break;
case 'tick': pulseNode('sensor'); break; case 'tick': pulseNode('sensor'); break;
} }
}); // end _enqueue
} }
export function startPhysics() { export function startPhysics() {

View File

@ -4,7 +4,6 @@ import { initAuth, authToken, startLogin } from './auth.js';
import { initTrace, addTrace, clearTrace } from './trace.js'; import { initTrace, addTrace, clearTrace } from './trace.js';
import { initChat, clearChat } from './chat.js'; import { initChat, clearChat } from './chat.js';
import { clearDashboard } from './dashboard.js'; import { clearDashboard } from './dashboard.js';
import { clearNodes } from './awareness.js';
import { initGraph } from './graph.js'; import { initGraph } from './graph.js';
import { connect } from './ws.js'; import { connect } from './ws.js';
@ -13,13 +12,10 @@ window.addEventListener('load', async () => {
initTrace(); initTrace();
initChat(); initChat();
await initGraph(); await initGraph();
await initAuth(() => { await initAuth(() => connect());
connect();
loadGraphSwitcher();
});
}); });
// Clear session // Clear session button
window.clearSession = async () => { window.clearSession = async () => {
try { try {
const headers = { 'Content-Type': 'application/json' }; const headers = { 'Content-Type': 'application/json' };
@ -28,63 +24,11 @@ window.clearSession = async () => {
clearChat(); clearChat();
clearTrace(); clearTrace();
clearDashboard(); clearDashboard();
clearNodes();
addTrace('runtime', 'cleared', 'session reset'); addTrace('runtime', 'cleared', 'session reset');
} catch (e) { } catch (e) {
addTrace('runtime', 'error', 'clear failed: ' + e); addTrace('runtime', 'error', 'clear failed: ' + e);
} }
}; };
// Graph switcher — loads available graphs and shows buttons in top bar // Login button
async function loadGraphSwitcher() {
const container = document.getElementById('graph-switcher');
if (!container) { console.error('[main] no #graph-switcher'); return; }
try {
const headers = {};
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
const r = await fetch('/api/graph/list', { headers });
if (!r.ok) { console.error('[main] graph/list failed:', r.status); return; }
const data = await r.json();
const graphs = data.graphs || data || [];
console.log('[main] graphs:', graphs.length);
// Get current active graph
let activeGraph = '';
try {
const ar = await fetch('/api/graph/active', { headers });
if (ar.ok) {
const ag = await ar.json();
activeGraph = ag.name || '';
}
} catch (e) {}
container.innerHTML = graphs.map(g => {
const active = g.name === activeGraph;
return `<button class="btn-graph${active ? ' active' : ''}" onclick="switchGraph('${g.name}')" title="${g.description}">${g.name}</button>`;
}).join('');
} catch (e) {}
}
window.switchGraph = async (name) => {
try {
const headers = { 'Content-Type': 'application/json' };
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
await fetch('/api/graph/switch', {
method: 'POST', headers,
body: JSON.stringify({ name }),
});
addTrace('runtime', 'graph_switch', name);
clearChat();
clearTrace();
clearDashboard();
clearNodes();
addTrace('runtime', 'switched', `graph: ${name}`);
await initGraph();
loadGraphSwitcher();
} catch (e) {
addTrace('runtime', 'error', 'switch failed: ' + e);
}
};
// Login
window.startLogin = startLogin; window.startLogin = startLogin;

View File

@ -2,10 +2,10 @@
import { authToken, isAuthFailed, setAuthFailed, showLogin } from './auth.js'; import { authToken, isAuthFailed, setAuthFailed, showLogin } from './auth.js';
import { addTrace } from './trace.js'; import { addTrace } from './trace.js';
import { addMsg, handleDelta, handleDone, setWs as setChatWs } from './chat.js'; import { handleDelta, handleDone, setWs as setChatWs } from './chat.js';
import { dockControls, dockArtifacts, setWs as setDashWs } from './dashboard.js'; import { dockControls, setWs as setDashWs } from './dashboard.js';
import { graphAnimate } from './graph.js'; import { graphAnimate } from './graph.js';
import { updateMeter, updateNodeFromHud, updateAwarenessState, updateAwarenessSensors } from './awareness.js'; import { updateMeter, updateAwarenessState, updateAwarenessSensors } from './awareness.js';
import { updateTestStatus } from './tests.js'; import { updateTestStatus } from './tests.js';
import { truncate, esc } from './util.js'; import { truncate, esc } from './util.js';
@ -30,14 +30,12 @@ export function connect() {
setChatWs(ws); setChatWs(ws);
setDashWs(ws); setDashWs(ws);
connectDebugSockets(); connectDebugSockets();
restoreHistory();
}; };
ws.onerror = () => {}; ws.onerror = () => {};
ws.onclose = (e) => { ws.onclose = (e) => {
// 4001 = explicit auth rejection from server if (e.code === 4001 || e.code === 1006) {
if (e.code === 4001) {
setAuthFailed(true); setAuthFailed(true);
localStorage.removeItem('cog_token'); localStorage.removeItem('cog_token');
localStorage.removeItem('cog_access_token'); localStorage.removeItem('cog_access_token');
@ -46,10 +44,9 @@ export function connect() {
showLogin(); showLogin();
return; return;
} }
// 1006 = abnormal close (deploy, network), just reconnect document.getElementById('status').textContent = 'disconnected';
document.getElementById('status').textContent = 'reconnecting...'; document.getElementById('status').style.color = '#666';
document.getElementById('status').style.color = '#f59e0b'; addTrace('runtime', 'disconnected', 'ws closed');
addTrace('runtime', 'disconnected', `code ${e.code}, reconnecting...`);
setTimeout(connect, 2000); setTimeout(connect, 2000);
}; };
@ -61,8 +58,6 @@ export function connect() {
handleDelta(data.content); handleDelta(data.content);
} else if (data.type === 'done') { } else if (data.type === 'done') {
handleDone(); handleDone();
} else if (data.type === 'artifacts') {
dockArtifacts(data.artifacts);
} else if (data.type === 'controls') { } else if (data.type === 'controls') {
dockControls(data.controls); dockControls(data.controls);
} else if (data.type === 'cleared') { } else if (data.type === 'cleared') {
@ -71,31 +66,6 @@ export function connect() {
}; };
} }
async function restoreHistory() {
try {
const headers = {};
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
const r = await fetch('/api/history?last=20', { headers });
if (!r.ok) return;
const data = await r.json();
const messages = data.messages || [];
if (!messages.length) return;
// Only restore if chat is empty (fresh load)
if (document.getElementById('messages').children.length > 0) return;
for (const msg of messages) {
const el = addMsg(msg.role, '');
if (msg.role === 'assistant') {
// Render as markdown
const { renderMarkdown } = await import('./util.js');
el.innerHTML = renderMarkdown(msg.content || '');
} else {
el.textContent = msg.content || '';
}
}
addTrace('runtime', 'restored', `${messages.length} messages`);
} catch (e) {}
}
function connectDebugSockets() { function connectDebugSockets() {
const proto = location.protocol === 'https:' ? 'wss:' : 'ws:'; const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
const base = proto + '//' + location.host; const base = proto + '//' + location.host;
@ -153,7 +123,6 @@ function handleHud(data) {
const event = data.event || ''; const event = data.event || '';
graphAnimate(event, node); graphAnimate(event, node);
updateNodeFromHud(node, event, data);
if (event === 'context') { if (event === 'context') {
const count = (data.messages || []).length; const count = (data.messages || []).length;

View File

@ -10,16 +10,10 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0;
#test-status .ts-pass { color: #22c55e; } #test-status .ts-pass { color: #22c55e; }
#test-status .ts-fail { color: #ef4444; } #test-status .ts-fail { color: #ef4444; }
@keyframes pulse-text { 0%,100% { opacity: 1; } 50% { opacity: 0.5; } } @keyframes pulse-text { 0%,100% { opacity: 1; } 50% { opacity: 0.5; } }
.btn-top { padding: 0.2rem 0.6rem; font-size: 0.7rem; background: #333; }
.btn-top:hover { background: #ef4444; }
#graph-switcher { display: flex; gap: 3px; }
.btn-graph { padding: 0.2rem 0.5rem; font-size: 0.65rem; font-family: monospace; background: #1a1a1a; color: #888; border: 1px solid #333; border-radius: 3px; cursor: pointer; }
.btn-graph:hover { color: #fff; border-color: #2563eb; }
.btn-graph.active { color: #22c55e; border-color: #22c55e; background: #0a1e14; }
/* === Two-row layout === */ /* === Two-row layout === */
/* Middle row: workspace | node detail | graph */ /* Middle row: workspace | node detail | graph */
#middle-row { display: grid; grid-template-columns: 1fr 300px 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; } #middle-row { display: grid; grid-template-columns: 1fr 200px 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; }
/* Bottom row: chat | awareness | trace */ /* Bottom row: chat | awareness | trace */
#bottom-row { display: grid; grid-template-columns: 1fr 1fr 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; } #bottom-row { display: grid; grid-template-columns: 1fr 1fr 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; }
@ -42,19 +36,12 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0;
/* Node detail / metrics */ /* Node detail / metrics */
.detail-panel { display: flex; flex-direction: column; } .detail-panel { display: flex; flex-direction: column; }
#node-metrics { flex: 1; overflow-y: auto; padding: 0.3rem; display: flex; flex-direction: column; gap: 2px; } #node-metrics { flex: 1; overflow-y: auto; padding: 0.3rem; display: flex; flex-direction: column; gap: 1px; }
.node-card { background: #111; border-radius: 3px; padding: 0.25rem 0.4rem; border-left: 2px solid #333; } .node-meter { display: flex; align-items: center; gap: 0.3rem; padding: 0.2rem 0.4rem; background: #111; border-radius: 2px; }
.node-card.nm-active { border-left-color: #f59e0b; background: #1a1408; } .nm-label { font-size: 0.6rem; font-weight: 700; text-transform: uppercase; letter-spacing: 0.03em; min-width: 3.5rem; color: #888; }
.node-card.nm-streaming { border-left-color: #22c55e; background: #0a1e14; } .nm-bar { flex: 1; height: 5px; background: #1a1a1a; border-radius: 3px; overflow: hidden; }
.nc-header { display: flex; align-items: center; gap: 0.3rem; } .nm-fill { height: 100%; width: 0%; border-radius: 3px; transition: width 0.3s; background: #333; }
.nc-name { font-size: 0.65rem; font-weight: 700; text-transform: uppercase; color: #e0e0e0; min-width: 3rem; } .nm-text { font-size: 0.55rem; color: #555; min-width: 3rem; text-align: right; font-family: monospace; }
.nc-model { font-size: 0.55rem; color: #666; font-family: monospace; }
.nc-tokens { font-size: 0.55rem; color: #555; font-family: monospace; margin-left: auto; }
.nc-bar { height: 3px; background: #1a1a1a; border-radius: 2px; overflow: hidden; margin: 2px 0; }
.nc-fill { height: 100%; border-radius: 2px; background: #333; transition: width 0.3s; }
.nc-status { display: flex; gap: 0.3rem; align-items: baseline; }
.nc-event { font-size: 0.55rem; color: #888; font-family: monospace; }
.nc-detail { font-size: 0.55rem; color: #666; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
/* Graph panel */ /* Graph panel */
.graph-panel { display: flex; flex-direction: column; } .graph-panel { display: flex; flex-direction: column; }
@ -127,10 +114,6 @@ button:hover { background: #1d4ed8; }
.aw-row { display: flex; justify-content: space-between; padding: 0.08rem 0; } .aw-row { display: flex; justify-content: space-between; padding: 0.08rem 0; }
.aw-key { color: #888; font-size: 0.65rem; } .aw-key { color: #888; font-size: 0.65rem; }
.aw-val { color: #e0e0e0; font-size: 0.7rem; font-weight: 500; } .aw-val { color: #e0e0e0; font-size: 0.7rem; font-weight: 500; }
.aw-exp-conv { color: #4caf50; }
.aw-exp-deleg { color: #ff9800; }
.aw-exp-wait { color: #42a5f5; }
.aw-exp-obs { color: #9e9e9e; }
/* UI Controls (workspace) */ /* UI Controls (workspace) */
.controls-container { padding: 0.3rem 0; display: flex; flex-wrap: wrap; gap: 0.3rem; align-items: flex-start; } .controls-container { padding: 0.3rem 0; display: flex; flex-wrap: wrap; gap: 0.3rem; align-items: flex-start; }
@ -147,51 +130,6 @@ button:hover { background: #1d4ed8; }
.cd-label { color: #888; } .cd-label { color: #888; }
.cd-value { color: #e0e0e0; margin-left: 0.5rem; } .cd-value { color: #e0e0e0; margin-left: 0.5rem; }
/* Workspace cards */
.ws-card { background: #111; border: 1px solid #222; border-radius: 0.4rem; padding: 0.5rem 0.6rem; width: 100%; }
.ws-card-clickable { cursor: pointer; }
.ws-card-clickable:hover { border-color: #2563eb; background: #0a1628; }
.ws-card-title { font-size: 0.85rem; font-weight: 700; color: #e0e0e0; }
.ws-card-subtitle { font-size: 0.7rem; color: #888; margin-top: 0.1rem; }
.ws-card-fields { margin-top: 0.4rem; display: flex; flex-direction: column; gap: 0.15rem; }
.ws-card-field { display: flex; justify-content: space-between; font-size: 0.75rem; padding: 0.1rem 0; }
.ws-card-key { color: #888; }
.ws-card-val { color: #e0e0e0; font-weight: 500; }
.ws-card-link { color: #60a5fa; cursor: pointer; font-weight: 500; }
.ws-card-link:hover { text-decoration: underline; }
.ws-card-actions { margin-top: 0.4rem; display: flex; gap: 0.3rem; flex-wrap: wrap; }
.ws-card-btn { font-size: 0.7rem; padding: 0.2rem 0.5rem; }
.ws-list { display: flex; flex-direction: column; gap: 0.3rem; width: 100%; }
.ws-list-title { font-size: 0.75rem; font-weight: 700; color: #888; text-transform: uppercase; letter-spacing: 0.03em; margin-bottom: 0.2rem; }
.ws-card-nested { margin: 0; border-color: #1a1a2e; }
/* Artifact system */
.artifacts-container { padding: 0.3rem 0; display: flex; flex-direction: column; gap: 0.4rem; }
.ws-artifact { width: 100%; }
.ws-artifact-entity { background: #111; border: 1px solid #222; border-radius: 0.4rem; padding: 0.5rem 0.6rem; }
.ws-artifact-data_table { }
.ws-artifact-action_bar { display: flex; flex-wrap: wrap; gap: 0.3rem; }
.ws-artifact-status { padding: 0.25rem 0.4rem; font-size: 0.75rem; display: flex; align-items: center; gap: 0.4rem; }
.ws-artifact-header { font-size: 0.75rem; font-weight: 600; color: #888; margin-bottom: 0.2rem; }
.ws-artifact-fallback { font-size: 0.7rem; color: #666; font-family: monospace; white-space: pre-wrap; }
/* Document page artifact */
.ws-artifact-document_page { background: #111; border: 1px solid #222; border-radius: 0.4rem; padding: 0.8rem 1rem; }
.ws-doc-title { font-size: 1rem; font-weight: 700; color: #e0e0e0; margin-bottom: 0.6rem; border-bottom: 1px solid #333; padding-bottom: 0.4rem; }
.ws-doc-section { margin-bottom: 0.5rem; }
.ws-doc-heading { font-size: 0.8rem; font-weight: 700; color: #a78bfa; margin-bottom: 0.2rem; }
.ws-doc-content { font-size: 0.75rem; color: #ccc; line-height: 1.5; }
.ws-doc-content ul, .ws-doc-content ol { margin: 0.2rem 0; padding-left: 1.2rem; }
/* Machine artifact */
.ws-artifact-machine { background: #111; border: 1px solid #2563eb33; border-radius: 0.4rem; padding: 0.5rem 0.6rem; }
.ws-machine-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.3rem; }
.ws-machine-name { font-size: 0.75rem; font-weight: 600; color: #a78bfa; }
.ws-machine-state { font-size: 0.7rem; color: #60a5fa; background: #1e3a5f; padding: 0.1rem 0.4rem; border-radius: 0.2rem; }
.ws-machine-content { font-size: 0.75rem; color: #ccc; padding: 0.1rem 0; }
.ws-machine-data { display: flex; flex-wrap: wrap; gap: 0.3rem; margin-top: 0.2rem; }
.ws-machine-datum { font-size: 0.65rem; color: #888; background: #1a1a2e; padding: 0.1rem 0.3rem; border-radius: 0.2rem; }
/* Login overlay */ /* Login overlay */
#login-overlay { position: fixed; inset: 0; background: rgba(0,0,0,0.85); display: flex; align-items: center; justify-content: center; z-index: 1000; } #login-overlay { position: fixed; inset: 0; background: rgba(0,0,0,0.85); display: flex; align-items: center; justify-content: center; z-index: 1000; }
.login-card { background: #1a1a1a; padding: 2rem; border-radius: 0.6rem; text-align: center; } .login-card { background: #1a1a1a; padding: 2rem; border-radius: 0.6rem; text-align: center; }

View File

@ -1,33 +0,0 @@
# Artifact System
Tests that the artifact rendering pipeline works end-to-end.
Expert produces data → UINode converts to artifacts → frontend renders.
## Setup
- clear history
## Steps
### 1. Query produces data_table artifact
- send: show me 3 customers in a table
- expect_trace: has tool_call
- expect_response: length > 10
### 2. Entity detail via card
- send: show me details for customer 1
- expect_trace: has tool_call
- expect_response: length > 10
### 3. Action bar via buttons
- send: create two buttons on my dashboard: Refresh and Export
- expect_actions: length >= 2
- expect_actions: any action contains "refresh" or "Refresh"
### 4. Machine artifact
- send: create a machine called "flow" with initial state "ready" and a state called "done"
- expect_trace: has machine_created
### 5. Query after buttons survive
- send: how many customers are there?
- expect_response: length > 5
- expect_actions: any action contains "refresh" or "Refresh"

View File

@ -1,46 +0,0 @@
# Domain Context
Tests that the expert understands the Eras business domain:
Heizkostenabrechnung, Kunde→Objekt→Nutzeinheit→Geraet hierarchy,
and can formulate correct JOINs without guessing column names.
## Setup
- clear history
## Steps
### 1. Expert knows the hierarchy
- send: wie viele Objekte haben Kunden im Durchschnitt?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "doesn't exist"
- expect_response: length > 20
### 2. Expert can JOIN kunden and objekte
- send: zeig mir die Top 5 Kunden mit den meisten Objekten
- expect_trace: has tool_call
- expect_response: not contains "Error" or "error" or "Unknown column"
- expect_response: length > 20
### 3. Expert understands Nutzeinheiten belong to Objekte
- send: how many Nutzeinheiten does the system have total?
- expect_trace: has tool_call
- expect_response: not contains "Error" or "error" or "Unknown column"
- expect_response: length > 10
### 4. Expert understands Geraete belong to Nutzeinheiten
- send: which Objekt has the most Geraete?
- expect_trace: has tool_call
- expect_response: not contains "Error" or "error" or "Unknown column"
- expect_response: length > 20
### 5. Multi-hop query through hierarchy
- send: zeig alle Nutzer in Objekten von Kunde mit Jaeger im Namen
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "doesn't exist"
- expect_response: contains "Jaeger" or "jaeger"
### 6. PA formulates good job descriptions
- send: gib mir eine Uebersicht ueber Kunde 2
- expect_trace: has routed
- expect_response: length > 20
- expect_response: not contains "clarify" or "specify" or "what kind"

View File

@ -1,64 +0,0 @@
# Eras Domain Mastery
Tests that the expert knows the schema cold — no DESCRIBE at runtime, no SQL errors,
domain-correct responses. The expert is a Heizkostenabrechnung specialist, not a SQL explorer.
## Setup
- clear history
## Steps
### 1. Customer overview
- send: zeig mir die ersten 5 Kunden
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 2. Objekte per Kunde (junction table)
- send: welcher Kunde hat die meisten Objekte?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 3. Nutzeinheiten in an Objekt
- send: wie viele Nutzeinheiten hat Objekt 4?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 5
### 4. Geraete count per Objekt
- send: welches Objekt hat die meisten Geraete?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 5. Full hierarchy traversal (4 tables)
- send: zeig mir alle Nutzer von Kunde 2
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 10
### 6. Address lookup via junction
- send: was ist die Adresse von Objekt 4?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 10
### 7. Verbrauchsdaten query
- send: zeig mir die letzten 5 Verbrauchswerte von Geraet 100
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 10
### 8. Domain language response (not SQL dump)
- send: gib mir eine Zusammenfassung von Kunde 103
- expect_trace: has tool_call
- expect_response: not contains "SELECT" or "JOIN" or "FROM"
- expect_response: length > 30
### 9. Expert does NOT describe at runtime
- send: wie viele Geraete hat Kunde 63?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: not contains "DESCRIBE" or "describe"
- expect_response: length > 5

View File

@ -1,50 +0,0 @@
# Expectation Tracking
Tests that memorizer tracks user_expectation and it influences PA/Output behavior.
Exercises machine features (update_machine, transition_machine) alongside expectation transitions.
## Setup
- clear history
## Steps
### 1. Greeting sets conversational
- send: hi there!
- expect_response: length > 2
- expect_state: user_expectation is "conversational"
### 2. Create a wizard machine
- send: create a machine called "project" with states: planning (initial) and executing
- expect_trace: has machine_created
### 3. Delegate a task
- send: build me a summary report of the top 5 customers by device count
- expect_response: length > 20
- expect_state: user_expectation is "delegated" or "observing"
### 4. Ask about wizard (status check stays in flow)
- send: what state is my project machine in?
- expect_response: contains "planning" or "project"
- expect_state: user_expectation is "conversational" or "delegated"
### 5. Store data on machine
- send: use update_machine to store status=in_progress on the project machine
- expect_response: length > 5
### 6. Transition machine
- send: use transition_machine to move project to executing state
- expect_response: length > 5
### 7. Verify machine state and data
- send: what is the current state and data of the project machine?
- expect_response: contains "executing" or "in_progress"
### 8. Short nudge triggers waiting_input
- send: und?
- expect_response: length > 5
- expect_state: user_expectation is "waiting_input" or "conversational"
### 9. Quick thanks (observing)
- send: ok danke
- expect_response: length > 0
- expect_state: user_expectation is "observing" or "observational" or "conversational"

View File

@ -1,33 +0,0 @@
# Expert Recovery
Tests that the expert recovers from SQL errors by retrying with corrected queries,
not by reporting the error and stopping.
## Setup
- clear history
## Steps
### 1. Expert recovers from column error silently
- send: zeig mir alle Geraete von Objekt 4 mit Bezeichnung und Einbaudatum
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 2. Multi-table query with potential errors
- send: zeig mir alle Nutzer und ihre Geraete fuer Kunde 2
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 3. Expert does not give up on first failure
- send: zeig mir Verbrauchswerte fuer Geraet 50 im letzten Monat
- expect_trace: has tool_call
- expect_response: not contains "I need assistance" or "developer" or "schema issue"
- expect_response: length > 10
### 4. Expert retries on unmapped table (abrechnungsinformationen)
- send: zeig mir die letzten 3 Abrechnungsinformationen
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054"
- expect_response: length > 10

View File

@ -1,41 +0,0 @@
# Machine State → PA Context
Tests that PA reads machine state when routing, and experts can write back to machines.
Validates: enriched machine summary, update_machine, transition_machine.
## Setup
- clear history
## Steps
### 1. Create a machine
- send: create a navigation machine called "wizard" with initial state "start" and a second state called "details"
- expect_trace: has machine_created
### 2. PA sees machine in context
- send: what machines are active on my dashboard?
- expect_response: contains "wizard" or "start"
### 3. Expert stores data on machine
- send: use update_machine to store region=Bayern on the wizard machine
- expect_response: contains "Bayern" or "region" or "stored" or "updated"
### 4. PA sees stored data
- send: what data is stored in my wizard machine?
- expect_response: contains "Bayern" or "region"
### 5. Expert transitions machine to details
- send: use transition_machine to move wizard to details state
- expect_response: length > 5
### 6. PA sees updated state
- send: what state is the wizard in now?
- expect_response: contains "details"
### 7. Expert transitions back
- send: use transition_machine to move wizard back to start
- expect_response: length > 5
### 8. Final state check
- send: tell me the current wizard state and stored data
- expect_response: contains "start"

View File

@ -1,19 +0,0 @@
# PA Retry on Expert Failure
Tests that when expert fails, PA reformulates and retries with a different approach.
## Setup
- clear history
## Steps
### 1. Complex analytical query that may need retry
- send: Finde KWZ-Geraete mit verdaechtigen Verbrauchsspruengen - also wo der Verbrauch zwischen zwei Ablesungen stark ansteigt
- expect_response: length > 20
### 2. Verify results contain device data
- expect_response: contains "Gera" or "gera" or "KWZ" or "kwz" or "Verbrauch" or "device"
### 3. Follow up with details
- send: zeig mir die Verbraeuche von einem dieser Geraete
- expect_response: length > 10

View File

@ -1,7 +1,7 @@
{ {
"timestamp": "2026-03-30 00:02:55", "timestamp": "2026-03-29 06:04:47",
"testcases": { "testcases": {
"Artifact System": [ "S3* Audit Corrections": [
{ {
"step": "Setup", "step": "Setup",
"check": "clear", "check": "clear",
@ -9,355 +9,93 @@
"detail": "cleared" "detail": "cleared"
}, },
{ {
"step": "Query produces data_table artifact", "step": "Tool calls produce results (baseline)",
"check": "send: show me 3 customers in a table", "check": "send: create two buttons: Alpha and Beta",
"status": "PASS", "status": "PASS",
"detail": "response: The database contains information for three customers: Kathrin Jager, Leon Schre" "detail": "response: 👍 Okay, I've created buttons labeled \"Alpha\" and \"Beta\".\n"
}, },
{ {
"step": "Query produces data_table artifact", "step": "Tool calls produce results (baseline)",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Query produces data_table artifact",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 138 > 10"
},
{
"step": "Entity detail via card",
"check": "send: show me details for customer 1",
"status": "PASS",
"detail": "response: ```tool_code\nquery_db({\"query\":\"SELECT * FROM customers WHERE customer_id = 1\"})"
},
{
"step": "Entity detail via card",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Entity detail via card",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 84 > 10"
},
{
"step": "Action bar via buttons",
"check": "send: create two buttons on my dashboard: Refr",
"status": "PASS",
"detail": "response: I have added the 'Refresh' and 'Export' buttons to your dashboard. These buttons"
},
{
"step": "Action bar via buttons",
"check": "actions: length >= 2",
"status": "PASS",
"detail": "2 actions >= 2"
},
{
"step": "Action bar via buttons",
"check": "actions: any action contains \"refresh\" or \"Refresh\"",
"status": "PASS",
"detail": "found 'refresh' in actions"
},
{
"step": "Machine artifact",
"check": "send: create a machine called \"flow\" with init",
"status": "PASS",
"detail": "response: OK, I've created a new interactive machine called 'flow' with the initial state "
},
{
"step": "Machine artifact",
"check": "trace: has machine_created",
"status": "PASS",
"detail": "found event 'machine_created'"
},
{
"step": "Query after buttons survive",
"check": "send: how many customers are there?",
"status": "PASS",
"detail": "response: There are 693 customers in the database.\n"
},
{
"step": "Query after buttons survive",
"check": "response: length > 5",
"status": "PASS",
"detail": "length 41 > 5"
},
{
"step": "Query after buttons survive",
"check": "actions: any action contains \"refresh\" or \"Refresh\"",
"status": "PASS",
"detail": "found 'refresh' in actions"
}
],
"Fast v4": [
{
"step": "Setup",
"check": "clear",
"status": "PASS",
"detail": "cleared"
},
{
"step": "Reflex",
"check": "send: hi!",
"status": "PASS",
"detail": "response: Hey Nico! 👋 How can I help you today?\n"
},
{
"step": "Reflex",
"check": "response: length > 2",
"status": "PASS",
"detail": "length 38 > 2"
},
{
"step": "PA routes to expert",
"check": "send: show me 3 customers",
"status": "PASS",
"detail": "response: Alright, I've fetched 3 customer records for you. You can see the ID, Name detai"
},
{
"step": "PA routes to expert",
"check": "trace: has routed",
"status": "PASS",
"detail": "found event 'routed'"
},
{
"step": "PA routes to expert",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "PA routes to expert",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 181 > 10"
},
{
"step": "German query",
"check": "send: Zeig mir alle Tabellen in der Datenbank",
"status": "PASS",
"detail": "response: Okay, ich habe eine Liste aller Tabellen in der \"eras2_production\" Datenbank abg"
},
{
"step": "German query",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "German query",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 303 > 10"
},
{
"step": "Schema discovery",
"check": "send: describe the kunden table",
"status": "PASS",
"detail": "response: The `kunden` table stores customer information, including names, customer number"
},
{
"step": "Schema discovery",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Schema discovery",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 391 > 10"
},
{
"step": "Count query (cached schema)",
"check": "send: how many customers are there?",
"status": "PASS",
"detail": "response: There are 693 customers in the database.\n"
},
{
"step": "Count query (cached schema)",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Count query (cached schema)",
"check": "response: length > 5",
"status": "PASS",
"detail": "length 41 > 5"
},
{
"step": "Complex query",
"check": "send: which customers have the most devices?",
"status": "PASS",
"detail": "response: The query results list the top 10 customers with the most devices. Anne Bürger h"
},
{
"step": "Complex query",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Complex query",
"check": "response: length > 20",
"status": "PASS",
"detail": "length 166 > 20"
},
{
"step": "Error recovery",
"check": "send: SELECT * FROM nichtexistiert",
"status": "PASS",
"detail": "response: Oops! It seems like the table `nichtexistiert` doesn't exist in the database. 🤔 "
},
{
"step": "Error recovery",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Error recovery",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 396 > 10"
},
{
"step": "Memorizer",
"check": "send: my name is Nico",
"status": "PASS",
"detail": "response: Thanks, Nico! I'll remember that. Do you have any other questions? 😊\n"
},
{
"step": "Memorizer",
"check": "state: facts any contains \"Nico\"",
"status": "PASS",
"detail": "found 'Nico' in facts"
},
{
"step": "Language switch",
"check": "send: Hallo wie gehts?",
"status": "PASS",
"detail": "response: Mir geht es gut, danke der Nachfrage, Nico! Und selbst? Gibt es etwas, bei dem i"
},
{
"step": "Language switch",
"check": "state: language is \"de\" or \"mixed\"",
"status": "PASS",
"detail": "language=mixed"
},
{
"step": "Bye",
"check": "send: ok bye",
"status": "PASS",
"detail": "response: Bye Nico! 👋 If you need anything else, just let me know. 😊\n"
},
{
"step": "Bye",
"check": "response: length > 2",
"status": "PASS",
"detail": "length 59 > 2"
}
],
"Dashboard Integration": [
{
"step": "Setup",
"check": "clear",
"status": "PASS",
"detail": "cleared"
},
{
"step": "Expert creates buttons",
"check": "send: create two buttons on my dashboard: Repo",
"status": "PASS",
"detail": "response: I have added 'Report' and 'Export' buttons to your dashboard.\n\n(UI buttons shown"
},
{
"step": "Expert creates buttons",
"check": "actions: length >= 2",
"status": "PASS",
"detail": "3 actions >= 2"
},
{
"step": "Expert creates buttons",
"check": "actions: any action contains \"report\" or \"Report\"",
"status": "PASS",
"detail": "found 'report' in actions"
},
{
"step": "Buttons survive a query",
"check": "send: how many customers are there?",
"status": "PASS",
"detail": "response: I'm running a query to count all customer IDs. One moment...\n"
},
{
"step": "Buttons survive a query",
"check": "response: length > 5",
"status": "PASS",
"detail": "length 61 > 5"
},
{
"step": "Buttons survive a query",
"check": "actions: any action contains \"report\" or \"Report\"",
"status": "PASS",
"detail": "found 'report' in actions"
},
{
"step": "Expert creates a machine",
"check": "send: create a navigation machine called \"work",
"status": "PASS",
"detail": "response: I've created the 'workflow' machine with 'start' and 'step2' states. The 'start'"
},
{
"step": "Expert creates a machine",
"check": "trace: has tool_call create_machine",
"status": "PASS",
"detail": "found create_machine via machine_created event"
},
{
"step": "Expert shows data table",
"check": "send: show me 5 customers in a table",
"status": "PASS",
"detail": "response: Here are five customer entries with their IDs, names, object count, and status:\n"
},
{
"step": "Expert shows data table",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Expert shows data table",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 118 > 10"
},
{
"step": "Expert replaces buttons",
"check": "send: remove all buttons and create one button",
"status": "PASS",
"detail": "response: I have removed the existing 'Report' and 'Export' buttons from the dashboard and"
},
{
"step": "Expert replaces buttons",
"check": "actions: length >= 1", "check": "actions: length >= 1",
"status": "PASS", "status": "PASS",
"detail": "2 actions >= 1" "detail": "2 actions >= 1"
}, },
{ {
"step": "Expert replaces buttons", "step": "Tool calls produce results (baseline)",
"check": "actions: any action contains \"reset\" or \"Reset\"", "check": "actions: any action contains \"alpha\" or \"Alpha\"",
"status": "PASS", "status": "PASS",
"detail": "found 'reset' in actions" "detail": "found 'alpha' in actions"
},
{
"step": "Dashboard mismatch triggers re-emit",
"check": "send: I see nothing on my dashboard, fix it",
"status": "PASS",
"detail": "response: 👍 Done — Alpha and Beta buttons are now live on your dashboard. They should appe"
},
{
"step": "Dashboard mismatch triggers re-emit",
"check": "response: not contains \"sorry\" or \"apologize\"",
"status": "PASS",
"detail": "none of ['sorry', 'apologize'] found (as expected)"
},
{
"step": "Dashboard mismatch triggers re-emit",
"check": "actions: length >= 1",
"status": "PASS",
"detail": "2 actions >= 1"
},
{
"step": "DB error triggers retry with corrected SQL",
"check": "send: SELECT * FROM NichtExistent LIMIT 5",
"status": "PASS",
"detail": "response: Ah, it seems like the table `NichtExistent` does not exist. Double-check the tab"
},
{
"step": "DB error triggers retry with corrected SQL",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "DB error triggers retry with corrected SQL",
"check": "response: not contains \"1146\"",
"status": "PASS",
"detail": "none of ['1146'] found (as expected)"
},
{
"step": "DB error triggers retry with corrected SQL",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 163 > 10"
},
{
"step": "Complex request gets Director plan",
"check": "send: investigate which customers have the mos",
"status": "PASS",
"detail": "response: Okay, I'll look into which customers have the most devices. This might take a mo"
},
{
"step": "Complex request gets Director plan",
"check": "trace: has director_plan",
"status": "FAIL",
"detail": "no 'director_plan' event in trace"
},
{
"step": "Complex request gets Director plan",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Complex request gets Director plan",
"check": "response: length > 20",
"status": "PASS",
"detail": "length 86 > 20"
} }
] ]
}, },
"summary": { "summary": {
"passed": 58, "passed": 14,
"failed": 0 "failed": 1
} }
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,40 +0,0 @@
# Workspace Components
Tests that the expert emits structured UI components (cards, lists, tables)
instead of dumping text or raw SQL. The workspace should show domain-aware displays.
## Setup
- clear history
## Steps
### 1. Detail card for a single entity
- send: zeig mir Details zu Kunde 2
- expect_trace: has tool_call
- expect_actions: has card
- expect_response: not contains "SELECT" or "JOIN"
- expect_response: length > 10
### 2. List of items with navigation
- send: zeig mir alle Objekte von Kunde 2
- expect_trace: has tool_call
- expect_actions: has card or has table
- expect_response: length > 10
### 3. Table for tabular data
- send: zeig mir die Geraete von Objekt 4
- expect_trace: has tool_call
- expect_actions: has table
- expect_response: length > 10
### 4. Card with actions (drill-down buttons)
- send: zeig mir Auftrag 21479
- expect_trace: has tool_call
- expect_actions: length >= 1
- expect_response: length > 10
### 5. Summary card with key metrics
- send: gib mir eine Zusammenfassung von Objekt 4
- expect_trace: has tool_call
- expect_actions: has card
- expect_response: length > 20