Compare commits

..

No commits in common. "925fff731f1b6b618c7573551f9526b933231903" and "3a9c2795cfa7556203c49dedaaeadca8a2271c06" have entirely different histories.

35 changed files with 342 additions and 5311 deletions

View File

@ -153,29 +153,12 @@ def register_routes(app):
msg = json.loads(data)
# Always use current runtime (may change after graph switch)
rt = _active_runtime or runtime
try:
if msg.get("type") == "action":
action = msg.get("action", "unknown")
data_payload = msg.get("data")
if hasattr(rt, 'use_frames') and rt.use_frames:
# Frame engine handles actions as ACTION: prefix messages
action_text = f"ACTION:{action}"
if data_payload:
action_text += f"|data:{json.dumps(data_payload)}"
await rt.handle_message(action_text)
else:
await rt.handle_action(action, data_payload)
elif msg.get("type") == "cancel_process":
rt.process_manager.cancel(msg.get("pid", 0))
else:
await rt.handle_message(msg.get("text", ""), dashboard=msg.get("dashboard"))
except Exception as e:
import traceback
log.error(f"[ws] handler error: {e}\n{traceback.format_exc()}")
try:
await ws.send_text(json.dumps({"type": "hud", "node": "runtime", "event": "error", "detail": str(e)[:200]}))
except Exception:
pass
if msg.get("type") == "action":
await rt.handle_action(msg.get("action", "unknown"), msg.get("data"))
elif msg.get("type") == "cancel_process":
rt.process_manager.cancel(msg.get("pid", 0))
else:
await rt.handle_message(msg.get("text", ""), dashboard=msg.get("dashboard"))
except WebSocketDisconnect:
if _active_runtime:
_active_runtime.detach_ws()
@ -364,7 +347,6 @@ def register_routes(app):
"language": "en",
"style_hint": "casual, technical",
"facts": [],
"user_expectation": "conversational",
}
_pipeline_result = {"status": "idle", "id": "", "stage": "cleared"}
# Notify frontend via WS
@ -398,26 +380,11 @@ def register_routes(app):
from .engine import load_graph, get_graph_for_cytoscape
from .runtime import _active_graph_name
graph = load_graph(_active_graph_name)
# Include model info from instantiated nodes if runtime exists
node_details = {}
if _active_runtime:
for role, impl_name in graph["nodes"].items():
# Find the node instance by role
node_inst = getattr(_active_runtime, 'frame_engine', None)
if node_inst and hasattr(node_inst, 'nodes'):
inst = node_inst.nodes.get(role)
if inst:
node_details[role] = {
"impl": impl_name,
"model": getattr(inst, 'model', None) or '',
"max_tokens": getattr(inst, 'max_context_tokens', 0),
}
return {
"name": graph["name"],
"description": graph["description"],
"nodes": graph["nodes"],
"edges": graph["edges"],
"node_details": node_details,
"cytoscape": get_graph_for_cytoscape(graph),
}

View File

@ -302,59 +302,9 @@ class FrameEngine:
expert.send_hud = original_hud
thought_summary = (f"response[{len(thought.response)}] tool={thought.tool_used or 'none'} "
f"actions={len(thought.actions)} errors={len(thought.errors)}")
f"actions={len(thought.actions)}")
has_tool = bool(thought.tool_used and thought.tool_output)
# PA retry: if expert failed OR skipped tools when data was needed
expectation = self.memorizer.state.get("user_expectation", "conversational")
# Detect hallucination: expert returned no tool output for a data job
job_needs_data = any(k in (routing.job or "").lower()
for k in ["query", "select", "tabelle", "table", "daten", "data",
"cost", "kosten", "count", "anzahl", "average", "schnitt",
"find", "finde", "show", "zeig", "list", "beschreib"])
expert_skipped_tools = not has_tool and not thought.errors and job_needs_data
if (thought.errors or expert_skipped_tools) and not has_tool and expectation in ("delegated", "waiting_input", "conversational"):
retry_reason = f"{len(thought.errors)} errors" if thought.errors else "no tool calls for data job"
self._end_frame(rec, output_summary=thought_summary,
route="pa_retry", condition=f"expert_failed ({retry_reason}), expectation={expectation}")
await self._send_hud({"node": "runtime", "event": "pa_retry",
"detail": f"expert failed: {retry_reason}, retrying via PA"})
# Stream retry notice to user
retry_msg = "Anderer Ansatz..." if routing.language == "de" else "Trying a different approach..."
await self.sink.send_delta(retry_msg + "\n")
# PA reformulates with error context
retry_errors = thought.errors if thought.errors else [
{"query": "(none)", "error": "Expert produced no database queries. The job requires data lookup but the expert answered without querying. Reformulate with explicit query instructions."}
]
error_summary = "; ".join(e.get("error", "")[:80] for e in retry_errors[-2:])
rec = self._begin_frame(self.frame + 1, "pa_retry",
input_summary=f"errors: {error_summary[:100]}")
routing2 = await self.nodes["pa"].route_retry(
command, self.history, memory_context=mem_ctx,
identity=self.identity, channel=self.channel,
original_job=routing.job, errors=retry_errors)
self._end_frame(rec, output_summary=f"retry_job: {(routing2.job or '')[:60]}",
route=f"expert_{routing2.expert}" if routing2.expert != "none" else "output")
if routing2.expert != "none":
expert2 = self._experts.get(routing2.expert, expert)
rec = self._begin_frame(self.frame + 1, f"expert_{routing2.expert}_retry",
input_summary=f"retry job: {(routing2.job or '')[:80]}")
original_hud2 = expert2.send_hud
expert2.send_hud = self._make_progress_wrapper(original_hud2, routing2.language)
try:
thought = await expert2.execute(routing2.job, routing2.language)
finally:
expert2.send_hud = original_hud2
thought_summary = (f"response[{len(thought.response)}] tool={thought.tool_used or 'none'} "
f"errors={len(thought.errors)}")
has_tool = bool(thought.tool_used and thought.tool_output)
self._end_frame(rec, output_summary=thought_summary,
route="interpreter" if has_tool else "output+ui")
routing = routing2 # use retry routing for rest of pipeline
# Interpreter (conditional)
if self.has_interpreter and has_tool:
self._end_frame(rec, output_summary=thought_summary,
@ -573,7 +523,7 @@ class FrameEngine:
return self._make_result(result)
# Complex action — needs full pipeline
self._end_frame(rec, output_summary="no local handler", route="pa/director/thinker")
self._end_frame(rec, output_summary="no local handler", route="director/thinker")
action_desc = f"ACTION: {action}"
if data:
@ -585,9 +535,7 @@ class FrameEngine:
analysis=InputAnalysis(intent="action", topic=action, complexity="simple"),
source_text=action_desc)
if self.has_pa:
return await self._run_expert_pipeline(command, mem_ctx, dashboard)
elif self.has_director:
if self.has_director:
return await self._run_director_pipeline(command, mem_ctx, dashboard)
else:
return await self._run_thinker_pipeline(command, mem_ctx, dashboard)
@ -657,10 +605,6 @@ class FrameEngine:
response, controls = await asyncio.gather(output_task, ui_task)
if controls:
await self.sink.send_controls(controls)
# Send artifacts (new system) alongside controls
artifacts = self.ui_node.get_artifacts()
if artifacts:
await self.sink.send_artifacts(artifacts)
return response
def _check_condition(self, name: str, command: Command = None,
@ -678,7 +622,6 @@ class FrameEngine:
return {
"response": response,
"controls": self.ui_node.current_controls,
"artifacts": self.ui_node.get_artifacts(),
"memorizer": self.memorizer.state,
"frames": self.frame,
"trace": self.last_trace.to_dict(),

View File

@ -18,9 +18,6 @@ class Node:
self.context_fill_pct = 0
async def hud(self, event: str, **data):
# Always include model on context events so frontend knows what model each node uses
if event == "context" and self.model:
data["model"] = self.model
await self.send_hud({"node": self.name, "event": event, **data})
def trim_context(self, messages: list[dict]) -> list[dict]:

View File

@ -1,8 +1,4 @@
"""Eras Expert: Heizkostenabrechnung domain specialist.
The expert knows the full database schema. No DESCRIBE at runtime.
All queries use verified column names and JOIN patterns.
"""
"""Eras Expert: heating/energy customer database specialist."""
import asyncio
import logging
@ -17,209 +13,55 @@ class ErasExpertNode(ExpertNode):
name = "eras_expert"
default_database = "eras2_production"
DOMAIN_SYSTEM = """You are the Eras domain expert for Heizkostenabrechnung (German heating cost billing).
DOMAIN_SYSTEM = """You are the Eras expert — specialist for heating and energy customer data.
You work with the eras2_production database containing customer, device, and billing data.
All table and column names are German (lowercase). Common queries involve customer lookups,
device counts, consumption analysis, and billing reports."""
BUSINESS CONTEXT:
Eras is software for Hausverwaltungen and Messdienste who manage properties, meters, and billings.
The USER of this agent is an Eras customer exploring their data. They think in domain terms
(Kunden, Objekte, Wohnungen, Zaehler) NOT in SQL. Never expose SQL or table names to the user.
SCHEMA = """Known tables (eras2_production):
- kunden customers
- objekte properties/objects linked to customers
- nutzeinheit usage units within objects
- geraete devices/meters
- geraeteverbraeuche device consumption readings
- abrechnungen billing records
DOMAIN MODEL:
- Kunden = property managers (Hausverwaltungen). 693 in the system.
- Objekte = buildings/Liegenschaften managed by Kunden. 780 total. Linked via objektkunde (m:n).
- Nutzeinheiten = apartments/units inside Objekte. 4578 total.
- Nutzer = tenants/occupants of Nutzeinheiten. 8206 total.
- Geraete = measurement devices (Heizkostenverteiler, Zaehler). 56726 total.
- Verbraeuche = consumption readings from Geraete. 1.3M readings.
- Adressen = postal addresses, linked via objektadressen/kundenadressen.
CRITICAL: You do NOT know the exact column names. They are German and unpredictable.
Your FIRST tool_sequence step for ANY SELECT query MUST be DESCRIBE on the target table.
Then use the actual column names from the DESCRIBE result in your SELECT.
RESPOND IN DOMAIN LANGUAGE:
- Say "Kunde Jaeger hat 3 Objekte" not "SELECT COUNT..."
- Say "12 Wohnungen mit 45 Geraeten" not "nutzeinheit rows"
- Present data as summaries, not raw tables"""
Example tool_sequence for "show me 5 customers":
[
{{"tool": "query_db", "args": {{"query": "DESCRIBE kunden", "database": "eras2_production"}}}},
{{"tool": "query_db", "args": {{"query": "SELECT * FROM kunden LIMIT 5", "database": "eras2_production"}}}}
]"""
SCHEMA = """COMPLETE DATABASE SCHEMA (eras2_production) — use these exact column names:
def __init__(self, send_hud, process_manager=None):
super().__init__(send_hud, process_manager)
self._schema_cache: dict[str, str] = {} # table_name -> DESCRIBE result
=== kunden (693 rows) ===
PK: ID (int)
Name1, Name2, Name3 (longtext) customer name parts
Kundennummer (longtext) customer number
AnredeID (FK), BriefanredeID (FK), ZugeordneterKomplettdruckID (FK)
Anmerkung, Fremdnummer, Ansprechpartner (longtext)
Steuernummer, UmsatzsteuerID (longtext)
HatHistorie, IstWebkunde, IstNettoKunde, BrennstoffkostenNachFIFO, BelegePerEmail (bool)
MietpreisAnpassungProzent (decimal)
async def execute(self, job: str, language: str = "de"):
"""Execute with schema auto-discovery. Caches DESCRIBE results."""
# Inject cached schema into the job context
if self._schema_cache:
schema_ctx = "Known column names from previous DESCRIBE:\n"
for table, desc in self._schema_cache.items():
# Just first 5 lines to keep it compact
lines = desc.strip().split("\n")[:6]
schema_ctx += f"\n{table}:\n" + "\n".join(lines) + "\n"
job = job + "\n\n" + schema_ctx
=== objektkunde (911 rows) JUNCTION: kunden objekte (many-to-many) ===
PK: ID (int)
KundeID (FK kunden.ID)
ObjektID (FK objekte.ID)
ZeitraumVon, ZeitraumBis (datetime)
IstKunde, IstEigentuemer, IstRechnungsempfaenger, IstAbrechnungsempfaenger (bool)
result = await super().execute(job, language)
=== objekte (780 rows) ===
PK: ID (int)
Objektnummer (longtext) building reference number
AbleserID, MonteurID, UVIRefObjektID, ZugeordneterKomplettdruckID (FK)
Anmerkung, AnmerkungIntern (longtext)
HatHistorie, VorauszahlungGetrennt, Selbstablesung, IstObjektFreigegeben (bool)
# Cache any DESCRIBE results from this execution
# Parse from tool_output if it looks like a DESCRIBE result
if result.tool_output and "Field\t" in result.tool_output:
# Try to identify which table was described
for table in ["kunden", "objekte", "nutzeinheit", "geraete",
"geraeteverbraeuche", "abrechnungen"]:
if table in job.lower() or table in result.tool_output.lower():
self._schema_cache[table] = result.tool_output
log.info(f"[eras] cached schema for {table}")
break
=== objektadressen JUNCTION: objekte adressen ===
PK: ID, ObjektID (FK objekte.ID), AdresseID (FK adressen.ID), IstPrimaer (bool)
=== kundenadressen JUNCTION: kunden adressen ===
PK: ID, KundeID (FK kunden.ID), AdresseID (FK adressen.ID), TypDerAdresseID (FK)
=== adressen (1762 rows) ===
PK: ID (int)
Strasse, Hausnummer, Postleitzahl, Ort, Adresszusatz, Postfach (longtext)
LandID (FK), Laengengrad, Breitengrad (double)
=== nutzeinheit (4578 rows) ===
PK: ID (int)
ObjektID (FK objekte.ID)
NeNummerInt (longtext) unit number
Lage, Stockwerk, Flaeche, Nutzflaeche (various)
AdresseID (FK), CustomStatusKeyID (FK)
=== kundenutzeinheit JUNCTION: kunden nutzeinheit ===
PK: ID, KundeID (FK kunden.ID), NutzeinheitID (FK nutzeinheit.ID), Von, Bis (datetime)
=== nutzer (8206 rows) tenants/occupants ===
PK: ID (int)
NutzeinheitID (FK nutzeinheit.ID)
Name1, Name2, Name3, Name4 (longtext) tenant name
NutzungVon, NutzungBis (datetime)
ArtDerNutzung (int), AnredeID (FK), BriefanredeID (FK)
IstGesperrt, Selbstableser (bool)
=== geraete (56726 rows) meters/devices ===
PK: ID (int)
NutzeinheitID (FK nutzeinheit.ID)
Geraetenummer (longtext) device number/serial
Bezeichnung (longtext) device name/label
Beschreibung (longtext) description
ArtikelID (FK), NutzergruppenID (FK), Einheit (int)
Einbaudatum, Ausbaudatum, GeeichtBis, GeeichtAm, ErstInbetriebnahme, DefektAb (datetime)
FirmwareVersion, LaufendeNummer, GruppenKennung, Memo, AllgemeinesMemo (longtext)
AnsprechpartnerID, ZugeordneterRaumID, CustomStatusKeyID (FK)
Gemietet, Gewartet, KeinAndruck, IstAbzuziehendesGeraet, HatHistorie (bool)
=== geraeteverbraeuche (1.3M rows) consumption readings ===
PK: ID (int)
GeraetID (FK geraete.ID)
Ablesedatum (datetime) reading date
Ablesung (double) meter reading value
Verbrauch (double) consumption value
Faktor (double) factor
Aenderungsdatum (datetime)
AbleseartID (FK), Schaetzung (int), Status (int)
IstRekonstruiert (bool), Herkunft (int)
ManuellerWert (double), Rohablesung (double)
Anmerkung, Fehler, Ampullenfarbe (longtext)
=== auftraege (2960 rows) billing work orders ===
PK: ID (int)
AuftragNummer, Bezeichnung (longtext)
ErstellDatum, Abgeschlossen (datetime)
ZugeordneteAbrechnungsinformationID (FK abrechnungsinformationen.ID)
ErstellMitarbeiterID (FK), AuftragsTyp (int), Status (int)
Anmerkung, ObererText, UntererText (longtext)
=== auftragspositionen (5094 rows) line items per work order ===
PK: ID (int)
AuftragID (FK auftraege.ID)
ArtikelID (FK artikel.ID)
SollMenge, IstMenge (int)
ZugeordneterGeraeteArtikelID (FK), ZugeordneteVertragPositionID (FK)
=== artikelposition (70164 rows) billing line items with prices ===
PK: ID (int)
ZugewiesenerArtikelID (FK artikel.ID)
ZugewieseneAbrechnungID (FK abrechnungsinformationen.ID)
RechnungID (FK rechnung.ID)
MengeVorgabe, Menge (decimal), NettoVorgabe, Netto (decimal), MWST (decimal)
Rechnungsart (int), VorschussBerechnung (bool), ARechnung (bool)
VerstecktInNebenkostenID (FK), ZugeordneteVertragPositionID (FK)
=== artikel (1078 rows) service/product catalog ===
PK: ID (int)
Artikelnummer, Bezeichnung (longtext)
Netto (decimal), MWST (decimal)
BerechnungsZiel (int), UmlageIn (int)
ZugeordnetePreislisteID (FK)
IstStandard, ARechnung, AppZusatz, IstEigenKostenpos (bool)
=== rechnung (7356 rows) invoices ===
PK: ID (int)
Rechnungsnummer (longtext), Rechnungsart (int)
BezahltAm (datetime), BezahlterBetrag (decimal)
Druckdatum, Erstelldatum, Exportdatum (datetime)
AbrechnungsinformationID (FK abrechnungsinformationen.ID)
AbschlagSummeSonder, AbschlagSummeStandard (decimal)
Bankeinzug (bool)
=== abrechnungsinformationen (4261 rows) billing periods/settings ===
PK: ID (int)
Von, Bis (datetime) billing period
AbrechnungHeizung, AbrechnungWarmwasser, AbrechnungNebenkosten, AbrechnungKaltwasser (bool)
Tarifabrechnung, BHKW, HeizsaldoInNebenkosten, AbrechnungLegionellen, AbrechnungRauchmelder (bool)
=== nebenkosten (42209 rows) ancillary cost items ===
PK: ID (int)
Von, Bis (datetime)
Bezeichnung (longtext), Mwst (decimal), Brutto (decimal)
EinheitDerKostenart (longtext), Umlage (int), UmlageZiel (int)
ZugeordnetesObjektID (FK objekte.ID)
NurEigentuemer, NurNutzer (bool)
=== vorauszahlungen (83932 rows) advance payments per tenant ===
PK: ID (int)
ZugeordneterNutzerID (FK nutzer.ID)
BetragNebenkosten, BetragHeizkosten, BetragWarmwasser (decimal)
Von, Bis (datetime), IstNetto (bool)
=== heizbetriebskosten (22557 rows) heating operation costs ===
PK: ID (int)
Von, Bis (datetime), Bezeichnung (longtext)
Mwst (decimal), Brutto (decimal), Art (int)
ZugeordnetesObjektID (FK objekte.ID)
ZugeordneteVerbrauchsgruppeID (FK)
=== brennstofflieferungen (6477 rows) fuel deliveries ===
PK: ID (int)
GeliefertAm (datetime), Menge (decimal), Betrag (decimal)
Mwst (decimal), Heizwert (decimal)
Anfangsstand, Endstand (decimal)
ZugeordneterEnergieVerwerterID (FK), BrennstoffMediumID (FK)
ZugeordneteAbrechnungsinformationID (FK abrechnungsinformationen.ID)
=== vertragpositionen (4395 rows) contract line items ===
PK: ID (int)
LaufzeitVon, LaufzeitBis (datetime)
Menge (decimal), Gesamtpreis (decimal), PreisProEinheit (decimal), Mwst (decimal)
ArtikelID (FK artikel.ID), VertragNummer (longtext)
Art (int), Umlage (int)
JOIN PATTERNS (use exactly):
Kunde Objekte: JOIN objektkunde ok ON ok.KundeID = k.ID JOIN objekte o ON o.ID = ok.ObjektID
Objekt Adresse: JOIN objektadressen oa ON oa.ObjektID = o.ID JOIN adressen a ON a.ID = oa.AdresseID
Kunde Adresse: JOIN kundenadressen ka ON ka.KundeID = k.ID JOIN adressen a ON a.ID = ka.AdresseID
Objekt NE: JOIN nutzeinheit ne ON ne.ObjektID = o.ID
NE Nutzer: JOIN nutzer nu ON nu.NutzeinheitID = ne.ID
NE Geraete: JOIN geraete g ON g.NutzeinheitID = ne.ID
Geraet Verbrauch: JOIN geraeteverbraeuche gv ON gv.GeraetID = g.ID
Auftrag Positionen: JOIN auftragspositionen ap ON ap.AuftragID = a.ID
Auftrag Abrechnung: JOIN abrechnungsinformationen ai ON ai.ID = a.ZugeordneteAbrechnungsinformationID
Artikelpos Artikel: JOIN artikel art ON art.ID = ap.ZugewiesenerArtikelID
Artikelpos Rechnung: JOIN rechnung r ON r.ID = ap.RechnungID
Artikelpos Abrechnung: JOIN abrechnungsinformationen ai ON ai.ID = ap.ZugewieseneAbrechnungID
Nebenkosten Objekt: JOIN objekte o ON o.ID = nk.ZugeordnetesObjektID
Vorauszahlung Nutzer: JOIN nutzer nu ON nu.ID = vz.ZugeordneterNutzerID
RULES:
- For tables listed above: use ONLY the listed column names. Never guess.
- For tables NOT listed above: use SELECT * with LIMIT to discover columns.
- If a query fails, the retry system will show you the error. Fix the column name and try again.
- Always LIMIT large queries (max 50 rows).
- Use LEFT JOIN when results might be empty."""
return result

View File

@ -38,38 +38,28 @@ Given a job description, produce a JSON tool sequence to accomplish it.
Available tools:
- query_db(query, database) SQL SELECT/DESCRIBE/SHOW only
- emit_actions(actions) show buttons [{label, action, payload?}]
- emit_actions(actions) show buttons [{{label, action, payload?}}]
- set_state(key, value) persistent key-value
- create_machine(id, initial, states) interactive UI navigation
- add_state / reset_machine / destroy_machine machine lifecycle
- update_machine(id, data) update wizard data fields (e.g. {"bundesland": "Bayern"})
- transition_machine(id, target) move machine to a specific state
- emit_artifact(type, data, actions?, meta?) emit a typed workspace artifact:
type="entity_detail": data={title, subtitle?, fields:[{label,value}]}, actions=[{label,action}]
type="data_table": data={title?, columns:[str], rows:[{col:val}]}
type="document_page": data={title, sections:[{heading,content}]}
type="action_bar": actions=[{label, action, payload?}]
type="status": data={label, value?, display_type:"progress"|"info"|"text"}
PREFERRED: Use emit_artifact for all display output. Legacy emit_card/emit_display still work but emit_artifact is cleaner.
Cards are also generated automatically in the response step from query results.
- emit_display(items) formatted data [{{type, label, value?, style?}}]
- create_machine(id, initial, states) interactive UI with navigation
states: {{"state_name": {{"actions": [...], "display": [...]}}}}
- add_state(id, state, buttons, content) add state to machine
- reset_machine(id) reset to initial
- destroy_machine(id) remove machine
Output ONLY valid JSON:
{
{{
"tool_sequence": [
{"tool": "query_db", "args": {"query": "SELECT ...", "database": "{database}"}}
{{"tool": "query_db", "args": {{"query": "SELECT ...", "database": "{database}"}}}},
{{"tool": "emit_actions", "args": {{"actions": [{{"label": "...", "action": "..."}}]}}}}
],
"response_hint": "How to phrase the result"
}
"response_hint": "How to phrase the result for the user"
}}
Rules:
- NEVER guess column names. Use ONLY columns from the schema.
- NEVER guess column names. If unsure, DESCRIBE first.
- Max 5 tools. Keep it focused.
- For entity details: query all relevant fields, the response step creates the card.
- For lists: query multiple rows, the table renders automatically.
- The job is self-contained.
- NEVER answer data questions without querying the database. You MUST include at least one query_db call for any job that asks about data, counts, costs, or entities. If you are unsure which tables to use, start with DESCRIBE or SELECT * FROM table LIMIT 3 to explore.
- An EMPTY tool_sequence is ONLY acceptable if the job explicitly asks for a UI-only action (buttons, machine, display) with no data lookup."""
- The job is self-contained all context you need is in the job description."""
RESPONSE_SYSTEM = """You are a domain expert summarizing results for the user.
@ -78,203 +68,85 @@ Rules:
Job: {job}
{results}
Output a JSON object with "text" (response to user) and optionally "card" (structured display):
{
"text": "Concise natural response, 1-3 sentences. Reference data. Match language: {language}.",
"card": {
"title": "Entity Name or ID",
"subtitle": "Type or category",
"fields": [{"label": "Field", "value": "actual value from results"}],
"actions": [{"label": "Next action", "action": "action_id"}]
}
}
Rules:
- "text" is REQUIRED. Keep it short.
- "card" is OPTIONAL. Include it for single-entity details (Kunde, Objekt, Auftrag).
- Card fields must use ACTUAL values from the query results, never templates/placeholders.
- For lists of multiple entities, use multiple fields or skip the card.
- If no card makes sense, just return {"text": "..."}.
- Output ONLY valid JSON."""
Write a concise, natural response. 1-3 sentences.
- Reference specific data from the results.
- Don't repeat raw output — summarize.
- Match the language: {language}."""
def __init__(self, send_hud, process_manager=None):
super().__init__(send_hud)
MAX_RETRIES = 3
async def execute(self, job: str, language: str = "de") -> ThoughtResult:
"""Execute a self-contained job with retry on SQL errors.
Expert knows the schema plan, execute, retry if needed, respond."""
"""Execute a self-contained job. Returns ThoughtResult."""
await self.hud("thinking", detail=f"planning: {job[:80]}")
errors_so_far = []
tool_sequence = []
response_hint = ""
# Step 1: Plan tool sequence
plan_messages = [
{"role": "system", "content": self.PLAN_SYSTEM.format(
domain=self.DOMAIN_SYSTEM, schema=self.SCHEMA,
database=self.default_database)},
{"role": "user", "content": f"Job: {job}"},
]
plan_raw = await llm_call(self.model, plan_messages)
tool_sequence, response_hint = self._parse_plan(plan_raw)
for attempt in range(1, self.MAX_RETRIES + 1):
# Plan (or re-plan with error context)
plan_prompt = f"Job: {job}"
if errors_so_far:
plan_prompt += "\n\nPREVIOUS ATTEMPTS FAILED:\n"
for err in errors_so_far:
plan_prompt += f"- Query: {err['query']}\n Error: {err['error']}\n"
if 'describe' in err:
plan_prompt += f" DESCRIBE result: {err['describe'][:300]}\n"
plan_prompt += "\nFix the query. If a column was unknown, use the DESCRIBE result above or try SELECT * LIMIT 3 to see actual columns."
await self.hud("planned", tools=len(tool_sequence), hint=response_hint[:80])
plan_system = self.PLAN_SYSTEM
plan_system = plan_system.replace("{domain}", self.DOMAIN_SYSTEM)
plan_system = plan_system.replace("{schema}", self.SCHEMA)
plan_system = plan_system.replace("{database}", self.default_database)
plan_messages = [
{"role": "system", "content": plan_system},
{"role": "user", "content": plan_prompt},
]
plan_raw = await llm_call(self.model, plan_messages)
tool_sequence, response_hint = self._parse_plan(plan_raw)
await self.hud("planned", tools=len(tool_sequence),
hint=response_hint[:80], attempt=attempt)
# Step 2: Execute tools
actions = []
state_updates = {}
display_items = []
machine_ops = []
tool_used = ""
tool_output = ""
# Execute tools
actions = []
state_updates = {}
display_items = []
machine_ops = []
artifacts = []
tool_used = ""
tool_output = ""
had_error = False
for step in tool_sequence:
tool = step.get("tool", "")
args = step.get("args", {})
await self.hud("tool_call", tool=tool, args=args)
for step in tool_sequence:
tool = step.get("tool", "")
args = step.get("args", {})
await self.hud("tool_call", tool=tool, args=args)
if tool == "emit_actions":
actions.extend(args.get("actions", []))
elif tool == "set_state":
key = args.get("key", "")
if key:
state_updates[key] = args.get("value")
elif tool == "emit_display":
display_items.extend(args.get("items", []))
elif tool == "create_machine":
machine_ops.append({"op": "create", **args})
elif tool == "add_state":
machine_ops.append({"op": "add_state", **args})
elif tool == "reset_machine":
machine_ops.append({"op": "reset", **args})
elif tool == "destroy_machine":
machine_ops.append({"op": "destroy", **args})
elif tool == "query_db":
query = args.get("query", "")
database = args.get("database", self.default_database)
try:
result = await asyncio.to_thread(run_db_query, query, database)
tool_used = "query_db"
tool_output = result
await self.hud("tool_result", tool="query_db", output=result[:200])
except Exception as e:
tool_used = "query_db"
tool_output = f"Error: {e}"
await self.hud("tool_result", tool="query_db", output=str(e)[:200])
if tool == "emit_actions":
actions.extend(args.get("actions", []))
elif tool == "emit_card":
card = args.get("card", args)
card["type"] = "card"
display_items.append(card)
elif tool == "emit_list":
lst = args.get("list", args)
lst["type"] = "list"
display_items.append(lst)
elif tool == "set_state":
key = args.get("key", "")
if key:
state_updates[key] = args.get("value")
elif tool == "emit_display":
display_items.extend(args.get("items", []))
elif tool == "create_machine":
machine_ops.append({"op": "create", **args})
elif tool == "add_state":
machine_ops.append({"op": "add_state", **args})
elif tool == "reset_machine":
machine_ops.append({"op": "reset", **args})
elif tool == "destroy_machine":
machine_ops.append({"op": "destroy", **args})
elif tool == "update_machine":
machine_ops.append({"op": "update_data", **args})
elif tool == "transition_machine":
machine_ops.append({"op": "transition", **args})
elif tool == "emit_artifact":
import uuid
artifact = {
"id": args.get("id", str(uuid.uuid4())[:8]),
"type": args.get("type", "status"),
"data": args.get("data", {}),
"actions": args.get("actions", []),
"meta": args.get("meta", {}),
}
artifacts.append(artifact)
elif tool == "query_db":
query = args.get("query", "")
database = args.get("database", self.default_database)
try:
result = await asyncio.to_thread(run_db_query, query, database)
if result.startswith("Error:"):
err_entry = {"query": query, "error": result}
# Auto-DESCRIBE on column errors to help retry
if "Unknown column" in result or "1054" in result:
import re
# Extract table name from query
tables_in_query = re.findall(r'FROM\s+(\w+)|JOIN\s+(\w+)', query, re.IGNORECASE)
for match in tables_in_query:
tname = match[0] or match[1]
if tname:
try:
desc = await asyncio.to_thread(run_db_query, f"DESCRIBE {tname}", database)
err_entry["describe"] = f"{tname}: {desc[:300]}"
await self.hud("tool_result", tool="describe",
output=f"Auto-DESCRIBE {tname}")
except Exception:
pass
break
errors_so_far.append(err_entry)
had_error = True
await self.hud("tool_result", tool="query_db",
output=f"ERROR (attempt {attempt}): {result[:150]}")
break
tool_used = "query_db"
tool_output = result
await self.hud("tool_result", tool="query_db", output=result[:200])
except Exception as e:
errors_so_far.append({"query": query, "error": str(e)})
had_error = True
await self.hud("tool_result", tool="query_db",
output=f"ERROR (attempt {attempt}): {e}")
break
if not had_error:
break # success — stop retrying
log.info(f"[expert] attempt {attempt} failed, {len(errors_so_far)} errors")
# Generate response (with whatever we have — success or final error)
# Step 3: Generate response
results_text = ""
if tool_output:
results_text = f"Tool result:\n{tool_output[:500]}"
elif errors_so_far:
results_text = f"All {len(errors_so_far)} query attempts failed:\n"
for err in errors_so_far[-2:]:
results_text += f" {err['error'][:100]}\n"
resp_system = self.RESPONSE_SYSTEM
resp_system = resp_system.replace("{domain}", self.DOMAIN_SYSTEM)
resp_system = resp_system.replace("{job}", job)
resp_system = resp_system.replace("{results}", results_text)
resp_system = resp_system.replace("{language}", language)
resp_messages = [
{"role": "system", "content": resp_system},
{"role": "system", "content": self.RESPONSE_SYSTEM.format(
domain=self.DOMAIN_SYSTEM, job=job, results=results_text, language=language)},
{"role": "user", "content": job},
]
raw_response = await llm_call(self.model, resp_messages)
# Parse JSON response with optional card
response = raw_response or "[no response]"
try:
text = raw_response.strip()
if text.startswith("```"):
text = text.split("\n", 1)[1] if "\n" in text else text[3:]
if text.endswith("```"):
text = text[:-3]
text = text.strip()
resp_data = json.loads(text)
response = resp_data.get("text", raw_response)
if resp_data.get("artifact"):
# New: artifact in response JSON
art = resp_data["artifact"]
import uuid
if "id" not in art:
art["id"] = str(uuid.uuid4())[:8]
artifacts.append(art)
elif resp_data.get("card"):
card = resp_data["card"]
card["type"] = "card"
display_items.append(card)
except (json.JSONDecodeError, Exception):
pass # Use raw response as text
response = await llm_call(self.model, resp_messages)
if not response:
response = "[no response]"
await self.hud("done", response=response[:100])
@ -286,8 +158,6 @@ Rules:
state_updates=state_updates,
display_items=display_items,
machine_ops=machine_ops,
errors=errors_so_far,
artifacts=artifacts,
)
def _parse_plan(self, raw: str) -> tuple[list, str]:

View File

@ -22,7 +22,7 @@ Listener: {identity} on {channel}
Return ONLY valid JSON. No markdown, no explanation.
Schema:
{
{{
"who": "name or unknown",
"language": "en | de | mixed",
"intent": "question | request | social | action | feedback",
@ -30,7 +30,7 @@ Schema:
"tone": "casual | frustrated | playful | urgent",
"complexity": "trivial | simple | complex",
"context": "brief note or empty"
}
}}
Rules:
- Classify the CURRENT message only. Previous messages are context, not the target.
@ -53,11 +53,11 @@ Rules:
casual = neutral
Examples:
"hi there!" -> {"language":"en","intent":"social","tone":"casual","complexity":"trivial"}
"Wie spaet ist es?" -> {"language":"de","intent":"question","tone":"casual","complexity":"simple"}
"this is broken, nothing works" -> {"language":"en","intent":"feedback","tone":"frustrated","complexity":"simple"}
"create two buttons" -> {"language":"en","intent":"request","tone":"casual","complexity":"simple"}
"ok thanks bye" -> {"language":"en","intent":"social","tone":"casual","complexity":"trivial"}
"hi there!" -> {{"language":"en","intent":"social","tone":"casual","complexity":"trivial"}}
"Wie spaet ist es?" -> {{"language":"de","intent":"question","tone":"casual","complexity":"simple"}}
"this is broken, nothing works" -> {{"language":"en","intent":"feedback","tone":"frustrated","complexity":"simple"}}
"create two buttons" -> {{"language":"en","intent":"request","tone":"casual","complexity":"simple"}}
"ok thanks bye" -> {{"language":"en","intent":"social","tone":"casual","complexity":"trivial"}}
{memory_context}"""
@ -78,9 +78,8 @@ Examples:
history_summary = "Recent conversation:\n" + "\n".join(lines)
messages = [
{"role": "system", "content": self.SYSTEM.replace(
"{memory_context}", memory_context).replace(
"{identity}", identity).replace("{channel}", channel)},
{"role": "system", "content": self.SYSTEM.format(
memory_context=memory_context, identity=identity, channel=channel)},
]
if history_summary:
messages.append({"role": "user", "content": history_summary})

View File

@ -26,19 +26,6 @@ Given the conversation so far, output a JSON object with these fields:
- language: string primary language being used (en, de, mixed)
- style_hint: string how Output should talk (casual, formal, technical, poetic, etc.)
- facts: list of strings important facts learned about the user. NEVER drop facts from the existing list unless they are proven wrong. Always include all existing facts plus any new ones.
- user_expectation: string what the user expects the agent to do next. One of:
"conversational" default. User is chatting, asking questions, browsing. Normal back-and-forth.
"delegated" user gave an imperative task ("build X", "do Y", "create Z"). They expect autonomous progress, not clarifying questions.
"waiting_input" agent asked a question or presented choices. User's next message is likely an answer.
"observing" user returned after being idle, or is reviewing a large output. Brief responses, wait for explicit engagement.
Cues:
- Imperative verbs + task scope ("build", "create", "do", "find") delegated
- Agent ended with "Moment..." / thinking message but user hasn't seen full results yet → delegated (task still in progress)
- Short follow-ups like "und?", "ja?", "weiter?", "and?", "so?", "result?", "ergebnis?" waiting_input (user is waiting for the agent to deliver)
- Agent ended with a question ("Sollen wir...?", "Gibt es...?") waiting_input
- User said "ok/thanks/bye/danke" after output observing
- Everything else conversational
IMPORTANT: If the agent just delivered partial results or said "Moment..." and the user sends a short nudge, that is ALWAYS waiting_input, never conversational.
Output ONLY valid JSON. No explanation, no markdown fences."""
@ -53,7 +40,6 @@ Output ONLY valid JSON. No explanation, no markdown fences."""
"language": "en",
"style_hint": "casual, technical",
"facts": [],
"user_expectation": "conversational",
}
def get_context_block(self, sensor_lines: list[str] = None, ui_state: dict = None) -> str:

View File

@ -34,12 +34,6 @@ YOUR JOB: Transform the Thinker's reasoning into a natural, human-readable text
- Keep the user's language — if they wrote German, respond in German.
- Be concise. Don't describe data that the UI node will show as a table.
PHRASING by user_expectation (from memorizer):
- "delegated": progress-report style. State what was done and what's next. No questions unless blocked.
- "waiting_input": acknowledge the user's answer and continue the flow naturally.
- "observing": keep it brief. No unsolicited follow-up questions or suggestions.
- "conversational": natural, warm dialogue. Follow-ups are fine.
{memory_context}"""
async def process(self, thought: ThoughtResult, history: list[dict],
@ -48,7 +42,7 @@ PHRASING by user_expectation (from memorizer):
await self.hud("streaming")
messages = [
{"role": "system", "content": self.SYSTEM.replace("{memory_context}", memory_context)},
{"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
]
for msg in history[-20:]:
messages.append(msg)

View File

@ -27,8 +27,6 @@ Experts have these tools:
- query_db SQL queries on their domain database
- emit_actions create buttons on the dashboard
- create_machine / add_state / reset_machine / destroy_machine interactive UI components
- update_machine(id, data) update wizard data fields on existing machine
- transition_machine(id, target) move machine to a specific state
- set_state persistent key-value store
- emit_display formatted data display
@ -38,13 +36,13 @@ YOUR JOB:
3. Only respond directly for social chat (greetings, thanks, bye, small talk)
Output ONLY valid JSON:
{
{{
"expert": "{expert_names} | none",
"job": "Self-contained task. Include ALL context — the expert has NO conversation history. Describe what to query, what UI to build, what the user expects to see.",
"thinking_message": "Short message for user while expert works, in their language",
"response_hint": "If expert=none, your direct response to the user.",
"language": "de | en | mixed"
}
}}
Rules:
- expert=none ONLY for social chat (hi, thanks, bye, how are you)
@ -55,21 +53,11 @@ Rules:
- thinking_message: natural, in user's language. e.g. "Moment, ich schaue nach..."
- If the user mentions data, tables, customers, devices, buttons, counters expert
- When unsure which expert: pick the one whose domain matches best
- MACHINE STATE: If there are active machines/wizards listed in the context below, ALWAYS include the machine's current state and stored data in the job. The expert needs this to continue the workflow. Example: "Machine 'angebot_wizard' is on step 'select_age', data: {bundesland: Bayern}. User asks: ..."
- If the user asks about their wizard/workflow progress and the info is already visible in the context, respond directly (expert=none) using the machine state from context. Only route to expert if the user needs data queried or tools called.
- For update_machine / transition_machine requests: route to expert with the machine ID and operation details in the job.
USER EXPECTATION (from memorizer):
- If user_expectation is "delegated": formulate comprehensive, autonomous jobs. Do NOT include clarifying questions in the job. Tell the expert to proceed and report results.
- If user_expectation is "waiting_input": the user is waiting for results or nudging ("und?", "ja?", "weiter?"). Look at conversation history to find what they were waiting for and re-formulate that job. If they answered a question you asked, extract their answer and fold it into context.
- If user_expectation is "observing": only route to expert if the user explicitly asks for something. Otherwise respond directly with brief acknowledgment.
- If user_expectation is "conversational": normal routing behavior.
- CONTINUATION: When user sends a very short message (1-3 words like "und?", "weiter", "ja") after partial/incomplete results, treat it as "continue the previous task". Include the original question and any partial results in the job.
{memory_context}"""
EXPERT_DESCRIPTIONS = {
"eras": "eras — Heizkostenabrechnung (German heating cost billing). Users are Hausverwaltungen managing Kunden, Objekte (buildings), Nutzeinheiten (apartments), Geraete (meters), Verbraeuche (readings), Abrechnungen (billings), Auftraege (work orders). Hierarchy: Kunde > Objekte > Nutzeinheiten > Geraete > Verbraeuche. Database: eras2_production. Can also build dashboard UI.",
"eras": "eras — heating/energy domain. Database: eras2_production (customers, devices, billing, consumption). Can also build dashboard UI (buttons, machines, counters, tables) for energy data workflows.",
"plankiste": "plankiste — Kita planning domain. Database: plankiste_test (children, care schedules, offers, pricing). Can build dashboard UI for education workflows and generate Angebote.",
}
@ -101,15 +89,10 @@ USER EXPECTATION (from memorizer):
expert_lines.append("- (no experts available — handle everything directly)")
expert_names = " | ".join(self._available_experts) if self._available_experts else "none"
# Manual substitution to avoid .format() breaking on curly braces in memory_context
system_content = self.SYSTEM
system_content = system_content.replace("{memory_context}", memory_context)
system_content = system_content.replace("{identity}", identity)
system_content = system_content.replace("{channel}", channel)
system_content = system_content.replace("{experts}", "\n".join(expert_lines))
system_content = system_content.replace("{expert_names}", expert_names)
messages = [
{"role": "system", "content": system_content},
{"role": "system", "content": self.SYSTEM.format(
memory_context=memory_context, identity=identity, channel=channel,
experts="\n".join(expert_lines), expert_names=expert_names)},
]
# Summarize recent history (PA sees full context)
@ -135,7 +118,7 @@ USER EXPECTATION (from memorizer):
log.info(f"[pa] raw: {raw[:300]}")
routing = self._parse_routing(raw, command)
await self.hud("routed", expert=routing.expert, job=(routing.job or "")[:100],
await self.hud("routed", expert=routing.expert, job=routing.job[:100],
direct=routing.expert == "none")
# Update directive style based on tone
@ -148,72 +131,6 @@ USER EXPECTATION (from memorizer):
return routing
async def route_retry(self, command: Command, history: list[dict],
memory_context: str = "", identity: str = "unknown",
channel: str = "unknown", original_job: str = "",
errors: list = None) -> PARouting:
"""Re-route after expert failure. PA reformulates with error context."""
await self.hud("thinking", detail="reformulating after expert failure")
error_lines = []
for err in (errors or [])[-3:]:
error_lines.append(f"- Query: {err.get('query', '?')[:100]}")
error_lines.append(f" Error: {err.get('error', '?')[:100]}")
if err.get("describe"):
error_lines.append(f" Schema: {err['describe'][:200]}")
retry_prompt = f"""The expert FAILED the previous job. You must reformulate.
ORIGINAL JOB: {original_job}
ERRORS:
{chr(10).join(error_lines)}
REFORMULATE the job with a DIFFERENT approach:
- If the query was too complex (JOINs, window functions), break it into simpler steps
- If columns were wrong, use the DESCRIBE info above to fix them
- If the table structure is unclear, tell the expert to first explore with SELECT * LIMIT 5
- Think about what data the user actually needs and find a simpler path to it
Output the same JSON format as before. The job MUST be different from the original."""
expert_lines = []
for name in self._available_experts:
desc = self.EXPERT_DESCRIPTIONS.get(name, f"{name} — domain expert")
expert_lines.append(f"- {desc}")
expert_names = " | ".join(self._available_experts) if self._available_experts else "none"
system_content = self.SYSTEM
system_content = system_content.replace("{memory_context}", memory_context)
system_content = system_content.replace("{identity}", identity)
system_content = system_content.replace("{channel}", channel)
system_content = system_content.replace("{experts}", "\n".join(expert_lines))
system_content = system_content.replace("{expert_names}", expert_names)
messages = [
{"role": "system", "content": system_content},
]
recent = history[-8:]
if recent:
lines = []
for msg in recent:
role = msg.get("role", "?")
content = msg.get("content", "")[:200]
lines.append(f" {role}: {content}")
messages.append({"role": "user", "content": "Recent conversation:\n" + "\n".join(lines)})
messages.append({"role": "assistant", "content": "OK, I have the context."})
messages.append({"role": "user", "content": retry_prompt})
messages = self.trim_context(messages)
raw = await llm_call(self.model, messages)
log.info(f"[pa] retry raw: {raw[:300]}")
routing = self._parse_routing(raw, command)
await self.hud("routed", expert=routing.expert, job=(routing.job or "")[:100],
direct=routing.expert == "none", retry=True)
return routing
def _parse_routing(self, raw: str, command: Command) -> PARouting:
"""Parse LLM JSON into PARouting with fallback."""
text = raw.strip()
@ -232,10 +149,10 @@ Output the same JSON format as before. The job MUST be different from the origin
expert = "none"
return PARouting(
expert=expert,
job=data.get("job") or "",
thinking_message=data.get("thinking_message") or "",
response_hint=data.get("response_hint") or "",
language=data.get("language") or command.analysis.language,
job=data.get("job", ""),
thinking_message=data.get("thinking_message", ""),
response_hint=data.get("response_hint", ""),
language=data.get("language", command.analysis.language),
)
except (json.JSONDecodeError, Exception) as e:
log.error(f"[pa] parse failed: {e}, raw: {text[:200]}")

View File

@ -236,7 +236,7 @@ You are one node in a pipeline: Input (perceives) -> You (reason) -> Output (spe
1. emit_actions() show buttons. Button clicks come back as "ACTION: action_name".
Stateful buttons: include var/op in payload (inc/dec/set/toggle). UI handles locally.
Example: label:"+1", action:"increment", payload:{"var":"count","op":"inc","initial":0}
Example: label:"+1", action:"increment", payload:{{"var":"count","op":"inc","initial":0}}
2. set_state(key, value) persistent key-value store shown as live labels.
Survives across turns. Use for tracking mode, progress, flags.
@ -253,9 +253,9 @@ You are one node in a pipeline: Input (perceives) -> You (reason) -> Output (spe
destroy_machine(id) remove machine from dashboard.
Example navigation menu:
create_machine(id="nav", initial="main", states=[
{"name":"main","buttons":[{"label":"Menu 1","action":"menu_1","go":"sub1"},{"label":"Menu 2","action":"menu_2","go":"sub2"}],"content":["Welcome"]},
{"name":"sub1","buttons":[{"label":"Back","action":"back","go":"main"}],"content":["Sub 1 details"]},
{"name":"sub2","buttons":[{"label":"Back","action":"back","go":"main"}],"content":["Sub 2 details"]}
{{"name":"main","buttons":[{{"label":"Menu 1","action":"menu_1","go":"sub1"}},{{"label":"Menu 2","action":"menu_2","go":"sub2"}}],"content":["Welcome"]}},
{{"name":"sub1","buttons":[{{"label":"Back","action":"back","go":"main"}}],"content":["Sub 1 details"]}},
{{"name":"sub2","buttons":[{{"label":"Back","action":"back","go":"main"}}],"content":["Sub 2 details"]}}
])
PREFER machines over emit_actions for anything with navigation or multiple views.
ALWAYS include states when creating a machine. Never write code use the tool.
@ -350,10 +350,10 @@ conn.commit()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = cursor.fetchall()
for t in tables:
cursor.execute(f"SELECT * FROM {t[0]}")
cursor.execute(f"SELECT * FROM {{t[0]}}")
rows = cursor.fetchall()
cols = [d[0] for d in cursor.description]
print(f"Table: {t[0]}")
print(f"Table: {{t[0]}}")
print(" | ".join(cols))
for row in rows:
print(" | ".join(str(c) for c in row))
@ -446,7 +446,7 @@ conn.close()'''
await self.hud("thinking", detail="reasoning about response")
messages = [
{"role": "system", "content": self.SYSTEM.replace("{memory_context}", memory_context)},
{"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
]
for msg in history[-12:]:
messages.append(msg)

View File

@ -88,7 +88,7 @@ Rules:
hint += f"\nTool result:\n{tool_output[:500]}"
messages = [
{"role": "system", "content": self.RESPONSE_SYSTEM.replace("{hint}", hint)},
{"role": "system", "content": self.RESPONSE_SYSTEM.format(hint=hint)},
]
for msg in history[-8:]:
messages.append(msg)

View File

@ -2,10 +2,9 @@
import json
import logging
import uuid
from .base import Node
from ..types import ThoughtResult, Artifact
from ..types import ThoughtResult
log = logging.getLogger("runtime")
@ -17,7 +16,6 @@ class UINode(Node):
def __init__(self, send_hud):
super().__init__(send_hud)
self.thinker_controls: list[dict] = [] # buttons, labels, tables from Thinker
self.artifacts: list[dict] = [] # typed workspace artifacts
self.state: dict = {} # {"count": 0, "theme": "dark", ...}
self.bindings: dict = {} # {"increment": {"op": "inc", "var": "count"}, ...}
self.machines: dict = {} # {"nav": {initial, states, current}, ...}
@ -81,7 +79,6 @@ class UINode(Node):
"initial": initial,
"current": initial,
"states": states,
"data": {}, # wizard field storage (e.g. {"bundesland": "Bayern"})
}
log.info(f"[ui] machine created: {mid} (initial={initial}, {len(states)} states)")
await self.hud("machine_created", id=mid, initial=initial, state_count=len(states))
@ -107,28 +104,6 @@ class UINode(Node):
log.info(f"[ui] machine reset: {mid} -> {initial}")
await self.hud("machine_reset", id=mid, state=initial)
elif op == "update_data":
if mid not in self.machines:
log.warning(f"[ui] update_data: machine '{mid}' not found")
continue
data_update = op_data.get("data", {})
self.machines[mid]["data"].update(data_update)
log.info(f"[ui] machine data updated: {mid} += {data_update}")
await self.hud("machine_data_updated", id=mid, data=data_update)
elif op == "transition":
if mid not in self.machines:
log.warning(f"[ui] transition: machine '{mid}' not found")
continue
target = op_data.get("target", "")
if target in self.machines[mid]["states"]:
old = self.machines[mid]["current"]
self.machines[mid]["current"] = target
log.info(f"[ui] machine transition (expert): {mid} {old} -> {target}")
await self.hud("machine_transitioned", id=mid, old=old, target=target)
else:
log.warning(f"[ui] transition target '{target}' not found in {mid}")
elif op == "destroy":
if mid in self.machines:
del self.machines[mid]
@ -182,31 +157,15 @@ class UINode(Node):
return controls
def get_machine_summary(self) -> str:
"""Rich summary for PA/Thinker context — includes current state details and stored data."""
"""Summary for Thinker context — shape only, not full data."""
if not self.machines:
return ""
parts = []
for mid, m in self.machines.items():
current = m["current"]
state_names = list(m["states"].keys())
state_def = m["states"].get(current, {})
line = f" machine '{mid}': state={current}, states={state_names}"
# Current state content
content = state_def.get("content", [])
if content:
line += f", content={content}"
# Current state buttons
buttons = state_def.get("buttons", [])
if buttons:
btn_labels = [b.get("label", b.get("action", "?")) for b in buttons if isinstance(b, dict)]
if btn_labels:
line += f", buttons={btn_labels}"
# Stored wizard data
data = m.get("data", {})
if data:
line += f", data={data}"
parts.append(line)
return "Active machines (interactive wizard/workflow state):\n" + "\n".join(parts)
parts.append(f" machine '{mid}': state={current}, states={state_names}")
return "Machines:\n" + "\n".join(parts)
# --- State operations ---
@ -347,21 +306,16 @@ class UINode(Node):
"value": str(value),
})
# 4. Add display items (cards, lists, or simple display)
# 4. Add display items from Thinker's emit_display() calls
if thought.display_items:
for item in thought.display_items:
item_type = item.get("type", "text")
if item_type in ("card", "list"):
# Pass through structured components as-is
controls.append(item)
else:
controls.append({
"type": "display",
"display_type": item_type,
"label": item.get("label", ""),
"value": item.get("value", ""),
"style": item.get("style", ""),
})
controls.append({
"type": "display",
"display_type": item.get("type", "text"),
"label": item.get("label", ""),
"value": item.get("value", ""),
"style": item.get("style", ""),
})
# 5. Extract tables from tool output
if thought.tool_output:
@ -384,155 +338,21 @@ class UINode(Node):
return controls
def _build_artifacts(self, thought: ThoughtResult) -> list[dict]:
"""Convert ThoughtResult into typed artifacts."""
arts = []
# 1. Direct artifacts from expert's emit_artifact calls
if thought.artifacts:
for a in thought.artifacts:
if not a.get("id"):
a["id"] = str(uuid.uuid4())[:8]
arts.append(a)
# 2. Convert display_items (cards, lists) → entity_detail artifacts
if thought.display_items:
for item in thought.display_items:
item_type = item.get("type", "text")
if item_type == "card":
arts.append({
"id": str(uuid.uuid4())[:8],
"type": "entity_detail",
"data": {
"title": item.get("title", ""),
"subtitle": item.get("subtitle", ""),
"fields": item.get("fields", []),
},
"actions": item.get("actions", []),
"meta": {},
})
elif item_type == "list":
arts.append({
"id": str(uuid.uuid4())[:8],
"type": "entity_detail",
"data": {
"title": item.get("title", ""),
"items": item.get("items", []),
},
"actions": [],
"meta": {"list": True},
})
else:
arts.append({
"id": str(uuid.uuid4())[:8],
"type": "status",
"data": {
"display_type": item_type,
"label": item.get("label", ""),
"value": item.get("value", ""),
"style": item.get("style", ""),
},
"actions": [],
"meta": {},
})
# 3. Convert actions → action_bar artifact
if thought.actions:
btns = self._parse_thinker_actions(thought.actions)
arts.append({
"id": "action_bar",
"type": "action_bar",
"data": {},
"actions": [{"label": b["label"], "action": b["action"],
"payload": b.get("payload", {})} for b in btns],
"meta": {},
})
elif self.thinker_controls:
# Preserve existing buttons as action_bar
existing_btns = [c for c in self.thinker_controls if c.get("type") == "button"]
if existing_btns:
arts.append({
"id": "action_bar",
"type": "action_bar",
"data": {},
"actions": [{"label": b["label"], "action": b["action"],
"payload": b.get("payload", {})} for b in existing_btns],
"meta": {},
})
# 4. Convert tool_output table → data_table artifact
if thought.tool_output:
table = self._extract_table(thought.tool_output)
if table:
arts.append({
"id": str(uuid.uuid4())[:8],
"type": "data_table",
"data": {
"columns": table["columns"],
"rows": table["data"],
},
"actions": [],
"meta": {"source": thought.tool_used or "query_db"},
})
# 5. State variables → status artifacts
if thought.state_updates:
for key, value in thought.state_updates.items():
self.set_var(key, value)
for var, value in self.state.items():
arts.append({
"id": f"state_{var}",
"type": "status",
"data": {"label": var, "value": str(value), "display_type": "text"},
"actions": [],
"meta": {"state_var": True},
})
# 6. Machines → machine artifacts
for mid, machine in self.machines.items():
current = machine["current"]
state_def = machine["states"].get(current, {})
arts.append({
"id": f"machine_{mid}",
"type": "machine",
"data": {
"machine_id": mid,
"current": current,
"states": list(machine["states"].keys()),
"content": state_def.get("content", []),
"stored_data": machine.get("data", {}),
},
"actions": [{"label": b.get("label", ""), "action": b.get("action", ""),
"go": b.get("go", "")}
for b in state_def.get("buttons", []) if isinstance(b, dict)],
"meta": {"live": True},
})
return arts
def get_artifacts(self) -> list[dict]:
"""Return current artifact list."""
return self.artifacts
async def process(self, thought: ThoughtResult, history: list[dict],
memory_context: str = "") -> list[dict]:
# Apply machine ops first (create/add_state/reset/destroy)
if thought.machine_ops:
await self.apply_machine_ops(thought.machine_ops)
# Build artifacts (new system)
self.artifacts = self._build_artifacts(thought)
# Build legacy controls (backward compat)
thinker_ctrls = self._build_controls(thought)
if thinker_ctrls:
self.thinker_controls = thinker_ctrls
# Always emit the merged view (thinker + machine)
merged = self.current_controls
if merged or self.artifacts:
if merged:
await self.hud("controls", controls=merged)
log.info(f"[ui] emitting {len(merged)} controls + {len(self.artifacts)} artifacts")
log.info(f"[ui] emitting {len(merged)} controls ({len(self.thinker_controls)} thinker + {len(self.get_machine_controls())} machine)")
else:
await self.hud("decided", instruction="no new controls")

View File

@ -17,7 +17,7 @@ log = logging.getLogger("runtime")
TRACE_FILE = Path(__file__).parent.parent / "trace.jsonl"
# Default graph — can be switched at runtime
_active_graph_name = "v4-eras"
_active_graph_name = "v1-current"
class OutputSink:
@ -56,13 +56,6 @@ class OutputSink:
except Exception:
pass
async def send_artifacts(self, artifacts: list):
if self.ws:
try:
await self.ws.send_text(json.dumps({"type": "artifacts", "artifacts": artifacts}))
except Exception:
pass
async def send_hud(self, data: dict):
if self.ws:
try:
@ -228,10 +221,9 @@ class Runtime:
self.history.append({"role": "user", "content": action_desc})
sensor_lines = self.sensor.get_context_lines()
director_line = self.director.get_context_line() if self.director else ""
director_line = self.director.get_context_line()
mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state)
if director_line:
mem_ctx += f"\n\n{director_line}"
mem_ctx += f"\n\n{director_line}"
command = Command(
analysis=InputAnalysis(intent="action", topic=action, complexity="simple"),
@ -250,7 +242,7 @@ class Runtime:
self.history.append({"role": "assistant", "content": response})
await self.memorizer.update(self.history)
if not self.is_v2 and self.director:
if not self.is_v2:
await self.director.update(self.history, self.memorizer.state)
if len(self.history) > self.MAX_HISTORY:
@ -327,10 +319,9 @@ class Runtime:
# Check Sensor flags (idle return, workspace mismatch)
sensor_flags = self.sensor.consume_flags()
sensor_lines = self.sensor.get_context_lines()
director_line = self.director.get_context_line() if self.director else ""
director_line = self.director.get_context_line()
mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state)
if director_line:
mem_ctx += f"\n\n{director_line}"
mem_ctx += f"\n\n{director_line}"
machine_summary = self.ui_node.get_machine_summary()
if machine_summary:
mem_ctx += f"\n\n{machine_summary}"

View File

@ -76,19 +76,6 @@ class PARouting:
language: str = "de" # Response language
@dataclass
class Artifact:
"""A typed workspace item. The unit of workspace content."""
id: str # unique ID
type: str # entity_detail | data_table | document_page | action_bar | status
data: dict = field(default_factory=dict) # type-specific payload
actions: list = field(default_factory=list) # [{label, action, payload?}]
meta: dict = field(default_factory=dict) # {entity?, related?, source_query?}
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class ThoughtResult:
"""Thinker node's output — either a direct answer or tool results."""
@ -99,5 +86,3 @@ class ThoughtResult:
state_updates: dict = field(default_factory=dict) # {key: value} from set_state
display_items: list = field(default_factory=list) # [{type, label, value?, style?}] from emit_display
machine_ops: list = field(default_factory=list) # [{op, id, ...}] from machine tools
errors: list = field(default_factory=list) # [{query, error, describe?}] from failed retries
artifacts: list = field(default_factory=list) # [Artifact] from emit_artifact

View File

@ -255,24 +255,14 @@ def check_actions(actions: list, check: str) -> tuple[bool, str]:
return True, f"{len(actions)} actions >= {expected}"
return False, f"{len(actions)} actions < {expected}"
# has TYPE or has TYPE1 or TYPE2
m = re.match(r'has\s+(.+)', check)
if m:
types = [t.strip() for t in m.group(1).split(" or has ")]
# Also handle "card or has table" → ["card", "table"]
types = [t.replace("has ", "") for t in types]
# has table
if check.strip() == "has table":
for a in actions:
if isinstance(a, dict) and a.get("type") in types:
atype = a.get("type")
if atype == "table":
return True, f"table found: {len(a.get('columns', []))} cols, {len(a.get('data', []))} rows"
elif atype == "card":
return True, f"card found: {a.get('title', '?')}, {len(a.get('fields', []))} fields"
elif atype == "list":
return True, f"list found: {a.get('title', '?')}, {len(a.get('items', []))} items"
else:
return True, f"{atype} found"
return False, f"no {' or '.join(types)} in {len(actions)} controls ({[a.get('type','?') for a in actions if isinstance(a, dict)]})"
if isinstance(a, dict) and a.get("type") == "table":
cols = a.get("columns", [])
rows = len(a.get("data", []))
return True, f"table found: {len(cols)} cols, {rows} rows"
return False, f"no table in {len(actions)} controls"
# any action contains "foo" or "bar" — searches buttons only
m = re.match(r'any action contains\s+"?(.+?)"?\s*$', check)
@ -382,12 +372,6 @@ def check_trace(trace: list, check: str) -> tuple[bool, str]:
return True, f"found reset_machine via machine_reset event"
if t.get("event") == "machine_destroyed" and tool_name == "destroy_machine":
return True, f"found destroy_machine via machine_destroyed event"
if t.get("event") == "machine_data_updated" and tool_name == "update_machine":
return True, f"found update_machine via machine_data_updated event"
if t.get("event") == "machine_transitioned" and tool_name == "transition_machine":
return True, f"found transition_machine via machine_transitioned event"
if t.get("event") == "pa_retry" and tool_name == "pa_retry":
return True, f"found pa_retry event"
return False, f"no tool_call '{tool_name}' in trace"
# machine_created id="NAV" — checks for specific machine creation

View File

@ -953,24 +953,6 @@ function send() {
inputEl.value = '';
}
async function clearSession() {
try {
const headers = { 'Content-Type': 'application/json' };
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
await fetch('/api/clear', { method: 'POST', headers });
// Clear UI
msgs.innerHTML = '';
traceEl.innerHTML = '';
_currentDashboard = [];
currentEl = null;
const dock = document.getElementById('dock');
if (dock) dock.innerHTML = '';
addTrace('runtime', 'cleared', 'session reset');
} catch (e) {
addTrace('runtime', 'error', 'clear failed: ' + e);
}
}
// --- Awareness panel updates ---
let _sensorReadings = {};

View File

@ -16,8 +16,6 @@
<h1>cog</h1>
<div id="test-status"></div>
<div style="flex:1"></div>
<div id="graph-switcher"></div>
<button onclick="clearSession()" class="btn-top" title="Clear session">Clear</button>
<div id="status">disconnected</div>
</div>
@ -29,7 +27,17 @@
</div>
<div class="panel detail-panel">
<div class="panel-header detail-h">Nodes</div>
<div id="node-metrics"></div>
<div id="node-metrics">
<div class="node-meter" id="meter-input"><span class="nm-label">input</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-director_v2"><span class="nm-label">director</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-pa_v1"><span class="nm-label">PA</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-thinker"><span class="nm-label">thinker</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-eras_expert"><span class="nm-label">eras</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-output"><span class="nm-label">output</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-memorizer"><span class="nm-label">memo</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-interpreter"><span class="nm-label">interp</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
<div class="node-meter" id="meter-sensor"><span class="nm-label">sensor</span><span class="nm-text" style="flex:1"></span></div>
</div>
</div>
<div class="panel graph-panel">
<div class="panel-header graph-h">Graph
@ -50,6 +58,7 @@
<div id="input-bar">
<input id="input" placeholder="Type a message..." autocomplete="off">
<button onclick="send()">Send</button>
<button onclick="clearSession()" class="btn-clear" title="Clear session">&#x2715;</button>
</div>
</div>
<div class="panel awareness-panel">

View File

@ -1,178 +1,15 @@
/** Awareness panel: memorizer state, sensor readings.
* Node detail panel: per-node model, tokens, progress, last event.
*/
/** Awareness panel: memorizer state, sensor readings, node meters. */
import { esc, truncate } from './util.js';
let _sensorReadings = {};
// --- Node state tracker ---
const _nodeState = {}; // { nodeName: { model, tokens, maxTokens, fillPct, lastEvent, lastDetail, status, toolCalls, startedAt } }
// Normalize node names to avoid duplicates (pa_v1→pa, expert_eras→eras, etc.)
function _normName(name) {
return name.replace('_v1', '').replace('_v2', '').replace('expert_', '');
}
function _getNode(name) {
const key = _normName(name);
if (!_nodeState[key]) {
_nodeState[key] = {
model: '', tokens: 0, maxTokens: 0, fillPct: 0,
lastEvent: '', lastDetail: '', status: 'idle',
toolCalls: 0, lastTool: '',
};
}
return _nodeState[key];
}
export function updateNodeFromHud(node, event, data) {
const n = _getNode(node);
if (event === 'context') {
if (data.model) n.model = data.model.replace('google/', '').replace('anthropic/', '');
if (data.tokens !== undefined) n.tokens = data.tokens;
if (data.max_tokens !== undefined) n.maxTokens = data.max_tokens;
if (data.fill_pct !== undefined) n.fillPct = data.fill_pct;
}
if (event === 'thinking') {
n.status = 'thinking';
n.lastEvent = 'thinking';
n.lastDetail = data.detail || '';
} else if (event === 'perceived') {
n.status = 'done';
n.lastEvent = 'perceived';
const a = data.analysis || {};
n.lastDetail = `${a.intent || '?'}/${a.language || '?'}/${a.tone || '?'}`;
} else if (event === 'decided' || event === 'routed') {
n.status = 'done';
n.lastEvent = event;
n.lastDetail = data.goal || data.instruction || data.job || '';
} else if (event === 'tool_call') {
n.status = 'tool';
n.lastEvent = 'tool_call';
n.lastTool = data.tool || '';
n.lastDetail = data.tool || '';
n.toolCalls++;
} else if (event === 'tool_result') {
n.lastEvent = 'tool_result';
n.lastDetail = truncate(data.output || '', 50);
} else if (event === 'streaming') {
n.status = 'streaming';
n.lastEvent = 'streaming';
} else if (event === 'done') {
n.status = 'done';
n.lastEvent = 'done';
} else if (event === 'updated') {
n.status = 'done';
n.lastEvent = 'updated';
} else if (event === 'planned') {
n.status = 'planned';
n.lastEvent = 'planned';
n.lastDetail = `${data.tools || 0} tools`;
} else if (event === 'interpreted') {
n.status = 'done';
n.lastEvent = 'interpreted';
n.lastDetail = truncate(data.summary || '', 50);
}
renderNodes();
}
// Fixed pipeline order — no re-sorting
// Fixed pipeline order using normalized names
const PIPELINE_ORDER = ['input', 'pa', 'director', 'eras', 'plankiste',
'thinker', 'interpreter', 'output', 'memorizer', 'ui', 'sensor'];
function renderNodes() {
const el = document.getElementById('node-metrics');
if (!el) return;
const entries = Object.entries(_nodeState)
.filter(([name]) => name !== 'runtime' && name !== 'frame_engine');
const sorted = entries.sort((a, b) => {
const ia = PIPELINE_ORDER.indexOf(a[0]);
const ib = PIPELINE_ORDER.indexOf(b[0]);
return (ia === -1 ? 99 : ia) - (ib === -1 ? 99 : ib);
});
let html = '';
for (const [name, n] of sorted) {
const statusClass = n.status === 'thinking' || n.status === 'tool' ? 'nm-active'
: n.status === 'streaming' ? 'nm-streaming' : '';
const shortName = name.replace('_v1', '').replace('_v2', '').replace('expert_', '');
const modelShort = n.model ? n.model.split('/').pop().replace('-001', '').replace('-4.5', '4.5') : '';
const tokenStr = n.maxTokens ? `${n.tokens}/${n.maxTokens}t` : '';
const fillW = n.fillPct || 0;
const detail = n.lastDetail ? truncate(n.lastDetail, 45) : '';
const toolStr = n.toolCalls > 0 ? ` [${n.toolCalls} calls]` : '';
html += `<div class="node-card ${statusClass}">
<div class="nc-header">
<span class="nc-name">${esc(shortName)}</span>
<span class="nc-model">${esc(modelShort)}</span>
<span class="nc-tokens">${esc(tokenStr)}</span>
</div>
<div class="nc-bar"><div class="nc-fill" style="width:${fillW}%"></div></div>
<div class="nc-status">
<span class="nc-event">${esc(n.lastEvent)}</span>
<span class="nc-detail">${esc(detail)}${esc(toolStr)}</span>
</div>
</div>`;
}
el.innerHTML = html;
}
export function initNodesFromGraph(graphData) {
// Populate node cards from graph definition (before any messages)
const nodes = graphData.nodes || {};
const details = graphData.node_details || {};
for (const [role, impl] of Object.entries(nodes)) {
const n = _getNode(role);
const d = details[role];
if (d) {
n.model = (d.model || '').replace('google/', '').replace('anthropic/', '');
n.maxTokens = d.max_tokens || 0;
}
n.lastEvent = 'idle';
n.status = 'idle';
}
renderNodes();
}
export function clearNodes() {
for (const key of Object.keys(_nodeState)) delete _nodeState[key];
const el = document.getElementById('node-metrics');
if (el) el.innerHTML = '';
}
// Keep old meter function for backward compat (called from ws.js)
export function updateMeter(node, tokens, maxTokens, fillPct) {
const n = _getNode(node);
n.tokens = tokens;
n.maxTokens = maxTokens;
n.fillPct = fillPct;
renderNodes();
}
// --- Awareness: memorizer state ---
export function updateAwarenessState(state) {
const body = document.getElementById('aw-state-body');
if (!body) return;
const expectation = state.user_expectation || 'conversational';
const expClass = {
conversational: 'aw-exp-conv',
delegated: 'aw-exp-deleg',
waiting_input: 'aw-exp-wait',
observing: 'aw-exp-obs',
}[expectation] || '';
const display = [
['user', state.user_name],
['mood', state.user_mood],
['expectation', expectation, expClass],
['topic', state.topic],
['lang', state.language],
['style', state.style_hint],
@ -181,8 +18,8 @@ export function updateAwarenessState(state) {
const facts = state.facts || [];
const history = state.topic_history || [];
let html = display.map(([k, v, cls]) =>
`<div class="aw-row"><span class="aw-key">${esc(k)}</span><span class="aw-val ${cls || ''}">${esc(v || 'null')}</span></div>`
let html = display.map(([k, v]) =>
`<div class="aw-row"><span class="aw-key">${esc(k)}</span><span class="aw-val">${esc(v || 'null')}</span></div>`
).join('');
if (facts.length) {
@ -196,8 +33,6 @@ export function updateAwarenessState(state) {
body.innerHTML = html;
}
// --- Awareness: sensor readings ---
export function updateAwarenessSensors(tick, deltas) {
const body = document.getElementById('aw-sensor-body');
if (!body) return;
@ -211,3 +46,12 @@ export function updateAwarenessSensors(tick, deltas) {
}
body.innerHTML = html;
}
export function updateMeter(node, tokens, maxTokens, fillPct) {
const meter = document.getElementById('meter-' + node);
if (!meter) return;
const bar = meter.querySelector('.nm-bar');
const text = meter.querySelector('.nm-text');
if (bar) bar.style.width = fillPct + '%';
if (text) text.textContent = `${tokens}/${maxTokens}t`;
}

View File

@ -1,9 +1,6 @@
/** Dashboard: workspace artifact + control rendering.
* Artifact system: typed artifacts (entity_detail, data_table, document_page, action_bar, status, machine).
* Legacy: dockControls() still works as fallback for old control format.
*/
/** Dashboard: workspace controls rendering (buttons, tables, labels, displays, machines). */
import { esc, renderMarkdown } from './util.js';
import { esc } from './util.js';
import { addTrace } from './trace.js';
import { setDashboard } from './chat.js';
@ -11,233 +8,8 @@ let _ws = null;
export function setWs(ws) { _ws = ws; }
function _sendAction(action, data) {
if (_ws && _ws.readyState === 1) {
_ws.send(JSON.stringify({ type: 'action', action, data: data || {} }));
addTrace('runtime', 'action', action);
}
}
// --- Artifact system ---
export function dockArtifacts(artifacts) {
const body = document.getElementById('workspace-body');
if (!body) return;
body.innerHTML = '';
const container = document.createElement('div');
container.className = 'artifacts-container';
for (const art of artifacts) {
const wrapper = document.createElement('div');
wrapper.className = 'ws-artifact ws-artifact-' + (art.type || 'unknown');
wrapper.dataset.artifactId = art.id || '';
const renderer = RENDERERS[art.type];
if (renderer) {
renderer(wrapper, art);
} else {
wrapper.innerHTML = '<div class="ws-artifact-fallback">' + esc(JSON.stringify(art.data || {})) + '</div>';
}
container.appendChild(wrapper);
}
body.appendChild(container);
// Also set dashboard for S3* audit (flatten actions from artifacts)
const flatControls = artifacts.flatMap(a => (a.actions || []).map(act => ({type: 'button', ...act})));
setDashboard(flatControls);
}
// --- Artifact renderers ---
const RENDERERS = {
entity_detail: renderEntityDetail,
data_table: renderDataTable,
document_page: renderDocumentPage,
action_bar: renderActionBar,
status: renderStatus,
machine: renderMachine,
};
function renderEntityDetail(el, art) {
const d = art.data || {};
let html = '';
if (d.title) html += '<div class="ws-card-title">' + esc(d.title) + '</div>';
if (d.subtitle) html += '<div class="ws-card-subtitle">' + esc(d.subtitle) + '</div>';
// List mode (multiple items)
if (d.items && d.items.length) {
html += '<div class="ws-list">';
for (const item of d.items) {
html += '<div class="ws-card ws-card-nested">';
if (item.title) html += '<div class="ws-card-title">' + esc(item.title) + '</div>';
if (item.fields) {
html += '<div class="ws-card-fields">';
for (const f of item.fields) {
html += '<div class="ws-card-field"><span class="ws-card-key">' + esc(f.label || '') + '</span><span class="ws-card-val">' + esc(String(f.value ?? '')) + '</span></div>';
}
html += '</div>';
}
html += '</div>';
}
html += '</div>';
}
// Single entity fields
if (d.fields && d.fields.length) {
html += '<div class="ws-card-fields">';
for (const f of d.fields) {
const val = f.action
? '<span class="ws-card-link" data-action="' + esc(f.action) + '">' + esc(String(f.value ?? '')) + '</span>'
: '<span class="ws-card-val">' + esc(String(f.value ?? '')) + '</span>';
html += '<div class="ws-card-field"><span class="ws-card-key">' + esc(f.label || '') + '</span>' + val + '</div>';
}
html += '</div>';
}
// Actions
if (art.actions && art.actions.length) {
html += '<div class="ws-card-actions">';
for (const a of art.actions) {
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
}
html += '</div>';
}
el.innerHTML = html;
_wireActions(el);
}
function renderDataTable(el, art) {
const d = art.data || {};
if (d.title) {
const title = document.createElement('div');
title.className = 'ws-artifact-header';
title.textContent = d.title;
el.appendChild(title);
}
const table = document.createElement('table');
table.className = 'control-table';
const cols = d.columns || (d.rows && d.rows.length ? Object.keys(d.rows[0]) : []);
if (cols.length) {
const thead = document.createElement('tr');
for (const col of cols) {
const th = document.createElement('th');
th.textContent = col;
thead.appendChild(th);
}
table.appendChild(thead);
}
for (const row of (d.rows || d.data || [])) {
const tr = document.createElement('tr');
if (Array.isArray(row)) {
for (const cell of row) {
const td = document.createElement('td'); td.textContent = cell; tr.appendChild(td);
}
} else if (typeof row === 'object') {
for (const col of cols) {
const td = document.createElement('td'); td.textContent = row[col] ?? ''; tr.appendChild(td);
}
}
table.appendChild(tr);
}
el.appendChild(table);
}
function renderDocumentPage(el, art) {
const d = art.data || {};
let html = '';
if (d.title) html += '<div class="ws-doc-title">' + esc(d.title) + '</div>';
for (const section of (d.sections || [])) {
html += '<div class="ws-doc-section">';
if (section.heading) html += '<div class="ws-doc-heading">' + esc(section.heading) + '</div>';
if (section.content) html += '<div class="ws-doc-content">' + renderMarkdown(section.content) + '</div>';
html += '</div>';
}
// Actions (e.g. PDF export)
if (art.actions && art.actions.length) {
html += '<div class="ws-card-actions">';
for (const a of art.actions) {
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
}
html += '</div>';
}
el.innerHTML = html;
_wireActions(el);
}
function renderActionBar(el, art) {
for (const a of (art.actions || [])) {
const btn = document.createElement('button');
btn.className = 'control-btn';
btn.textContent = a.label || '';
btn.onclick = () => _sendAction(a.action, a.payload || {});
el.appendChild(btn);
}
}
function renderStatus(el, art) {
const d = art.data || {};
const dt = d.display_type || 'text';
el.classList.add('display-' + dt);
if (dt === 'progress') {
const pct = Math.min(100, Math.max(0, Number(d.value) || 0));
el.innerHTML = '<span class="cd-label">' + esc(d.label) + '</span>'
+ '<div class="cd-bar"><div class="cd-fill" style="width:' + pct + '%"></div></div>'
+ '<span class="cd-pct">' + pct + '%</span>';
} else if (dt === 'info') {
el.innerHTML = '<span class="cd-icon">\u2139</span><span class="cd-label">' + esc(d.label) + '</span>';
} else {
el.innerHTML = '<span class="cd-label">' + esc(d.label || '') + '</span>'
+ (d.value ? '<span class="cd-value">' + esc(String(d.value)) + '</span>' : '');
}
}
function renderMachine(el, art) {
const d = art.data || {};
const mid = d.machine_id || '';
// Header
let html = '<div class="ws-machine-header"><span class="ws-machine-name">' + esc(mid) + '</span>'
+ '<span class="ws-machine-state">' + esc(d.current || '') + '</span></div>';
// Content
for (const text of (d.content || [])) {
html += '<div class="ws-machine-content">' + esc(text) + '</div>';
}
// Stored data
const stored = d.stored_data || {};
if (Object.keys(stored).length) {
html += '<div class="ws-machine-data">';
for (const [k, v] of Object.entries(stored)) {
html += '<span class="ws-machine-datum">' + esc(k) + '=' + esc(String(v)) + '</span>';
}
html += '</div>';
}
// Buttons
if (art.actions && art.actions.length) {
html += '<div class="ws-card-actions">';
for (const a of art.actions) {
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
}
html += '</div>';
}
el.innerHTML = html;
_wireActions(el);
}
// --- Helpers ---
function _wireActions(el) {
el.querySelectorAll('.ws-card-link').forEach(link => {
link.onclick = (e) => { e.stopPropagation(); _sendAction(link.dataset.action, {}); };
});
el.querySelectorAll('.ws-card-btn').forEach(btn => {
btn.onclick = (e) => { e.stopPropagation(); _sendAction(btn.dataset.action, {}); };
});
}
// --- Legacy control rendering (backward compat) ---
export function dockControls(controls) {
setDashboard(controls);
setDashboard(controls); // S3*: remember what's rendered
const body = document.getElementById('workspace-body');
if (!body) return;
body.innerHTML = '';
@ -249,7 +21,12 @@ export function dockControls(controls) {
const btn = document.createElement('button');
btn.className = 'control-btn';
btn.textContent = ctrl.label;
btn.onclick = () => _sendAction(ctrl.action, ctrl.payload || ctrl.data || {});
btn.onclick = () => {
if (_ws && _ws.readyState === 1) {
_ws.send(JSON.stringify({ type: 'action', action: ctrl.action, data: ctrl.payload || ctrl.data || {} }));
addTrace('runtime', 'action', ctrl.action);
}
};
container.appendChild(btn);
} else if (ctrl.type === 'table') {
const table = document.createElement('table');
@ -257,16 +34,22 @@ export function dockControls(controls) {
if (ctrl.columns) {
const thead = document.createElement('tr');
for (const col of ctrl.columns) {
const th = document.createElement('th'); th.textContent = col; thead.appendChild(th);
const th = document.createElement('th');
th.textContent = col;
thead.appendChild(th);
}
table.appendChild(thead);
}
for (const row of (ctrl.data || [])) {
const tr = document.createElement('tr');
if (Array.isArray(row)) {
for (const cell of row) { const td = document.createElement('td'); td.textContent = cell; tr.appendChild(td); }
for (const cell of row) {
const td = document.createElement('td'); td.textContent = cell; tr.appendChild(td);
}
} else if (typeof row === 'object') {
for (const col of (ctrl.columns || Object.keys(row))) { const td = document.createElement('td'); td.textContent = row[col] ?? ''; tr.appendChild(td); }
for (const col of (ctrl.columns || Object.keys(row))) {
const td = document.createElement('td'); td.textContent = row[col] ?? ''; tr.appendChild(td);
}
}
table.appendChild(tr);
}
@ -279,37 +62,21 @@ export function dockControls(controls) {
} else if (ctrl.type === 'display') {
const disp = document.createElement('div');
const dt = ctrl.display_type || 'text';
disp.className = 'control-display display-' + dt;
const style = ctrl.style ? ' display-' + ctrl.style : '';
disp.className = 'control-display display-' + dt + style;
if (dt === 'progress') {
const pct = Math.min(100, Math.max(0, Number(ctrl.value) || 0));
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span><div class="cd-bar"><div class="cd-fill" style="width:' + pct + '%"></div></div><span class="cd-pct">' + pct + '%</span>';
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span>'
+ '<div class="cd-bar"><div class="cd-fill" style="width:' + pct + '%"></div></div>'
+ '<span class="cd-pct">' + pct + '%</span>';
} else if (dt === 'status') {
disp.innerHTML = '<span class="cd-icon">' + (ctrl.style === 'success' ? '\u2713' : ctrl.style === 'error' ? '\u2717' : '\u2139') + '</span>'
+ '<span class="cd-label">' + esc(ctrl.label) + '</span>';
} else {
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span>' + (ctrl.value ? '<span class="cd-value">' + esc(String(ctrl.value)) + '</span>' : '');
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span>'
+ (ctrl.value ? '<span class="cd-value">' + esc(String(ctrl.value)) + '</span>' : '');
}
container.appendChild(disp);
} else if (ctrl.type === 'card') {
const card = document.createElement('div');
card.className = 'ws-card';
let html = '';
if (ctrl.title) html += '<div class="ws-card-title">' + esc(ctrl.title) + '</div>';
if (ctrl.subtitle) html += '<div class="ws-card-subtitle">' + esc(ctrl.subtitle) + '</div>';
if (ctrl.fields && ctrl.fields.length) {
html += '<div class="ws-card-fields">';
for (const f of ctrl.fields) {
html += '<div class="ws-card-field"><span class="ws-card-key">' + esc(f.label || '') + '</span><span class="ws-card-val">' + esc(String(f.value ?? '')) + '</span></div>';
}
html += '</div>';
}
if (ctrl.actions && ctrl.actions.length) {
html += '<div class="ws-card-actions">';
for (const a of ctrl.actions) {
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
}
html += '</div>';
}
card.innerHTML = html;
_wireActions(card);
container.appendChild(card);
}
}
body.appendChild(container);

View File

@ -1,12 +1,7 @@
/** Pipeline graph: Cytoscape visualization + animation. */
import { initNodesFromGraph } from './awareness.js';
let cy = null;
let _dragEnabled = true;
// Maps HUD node names → graph node IDs (built from graph definition)
// e.g. {"eras_expert": "expert_eras", "pa_v1": "pa", "thinker_v2": "thinker"}
let _nodeNameToId = {};
let _physicsRunning = false;
let _physicsLayout = null;
let _colaSpacing = 25;
@ -95,13 +90,6 @@ export async function initGraph() {
if (resp.ok) {
const graph = await resp.json();
graphElements = buildGraphElements(graph, mx, cw, mid, row1, row2);
initNodesFromGraph(graph);
// Build HUD name → graph ID mapping: {impl_name: role}
_nodeNameToId = {};
for (const [role, impl] of Object.entries(graph.nodes || {})) {
_nodeNameToId[impl] = role; // "eras_expert" → "expert_eras"
_nodeNameToId[role] = role; // "expert_eras" → "expert_eras"
}
}
} catch (e) {}
@ -161,24 +149,6 @@ export async function initGraph() {
});
}
// --- Animation queue: batch rapid events, play sequentially ---
const _animQueue = [];
let _animRunning = false;
const ANIM_INTERVAL = 200; // ms between queued animations
function _enqueue(fn) {
_animQueue.push(fn);
if (!_animRunning) _flushQueue();
}
function _flushQueue() {
if (!_animQueue.length) { _animRunning = false; return; }
_animRunning = true;
const fn = _animQueue.shift();
fn();
setTimeout(_flushQueue, ANIM_INTERVAL);
}
function pulseNode(id) {
if (!cy) return;
const node = cy.getElementById(id);
@ -197,29 +167,29 @@ function flashEdge(sourceId, targetId) {
export function graphAnimate(event, node) {
if (!cy) return;
// Resolve HUD node name to graph ID (e.g. "eras_expert" → "expert_eras")
const graphId = _nodeNameToId[node] || node;
_enqueue(() => {
if (graphId && cy.getElementById(graphId).length) pulseNode(graphId);
if (node && cy.getElementById(node).length) pulseNode(node);
switch (event) {
case 'perceived': pulseNode('input'); flashEdge('user', 'input'); break;
case 'decided':
pulseNode(graphId); flashEdge(graphId, 'output');
if (node === 'director_v2' || node === 'director' || node === 'pa_v1') {
pulseNode(node); flashEdge(node, 'thinker');
} else {
pulseNode(node || 'thinker'); flashEdge('thinker', 'output');
}
break;
case 'routed': pulseNode(_nodeNameToId['pa_v1'] || 'pa'); break;
case 'routed': pulseNode('pa'); break;
case 'reflex_path': pulseNode('input'); flashEdge('input', 'output'); break;
case 'streaming': if (graphId === 'output') pulseNode('output'); break;
case 'streaming': if (node === 'output') pulseNode('output'); break;
case 'controls': case 'machine_created': case 'machine_transition':
pulseNode('ui'); break;
case 'updated': pulseNode('memorizer'); flashEdge('output', 'memorizer'); break;
case 'tool_call': pulseNode(graphId); break;
case 'tool_result': pulseNode(graphId); break;
case 'thinking': pulseNode(graphId); break;
case 'planned': pulseNode(graphId); break;
case 'tool_call': pulseNode(node || 'thinker'); break;
case 'tool_result':
if (cy.getElementById('interpreter').length) pulseNode('interpreter'); break;
case 'thinking': if (node) pulseNode(node); break;
case 'tick': pulseNode('sensor'); break;
}
}); // end _enqueue
}
export function startPhysics() {

View File

@ -4,7 +4,6 @@ import { initAuth, authToken, startLogin } from './auth.js';
import { initTrace, addTrace, clearTrace } from './trace.js';
import { initChat, clearChat } from './chat.js';
import { clearDashboard } from './dashboard.js';
import { clearNodes } from './awareness.js';
import { initGraph } from './graph.js';
import { connect } from './ws.js';
@ -13,13 +12,10 @@ window.addEventListener('load', async () => {
initTrace();
initChat();
await initGraph();
await initAuth(() => {
connect();
loadGraphSwitcher();
});
await initAuth(() => connect());
});
// Clear session
// Clear session button
window.clearSession = async () => {
try {
const headers = { 'Content-Type': 'application/json' };
@ -28,63 +24,11 @@ window.clearSession = async () => {
clearChat();
clearTrace();
clearDashboard();
clearNodes();
addTrace('runtime', 'cleared', 'session reset');
} catch (e) {
addTrace('runtime', 'error', 'clear failed: ' + e);
}
};
// Graph switcher — loads available graphs and shows buttons in top bar
async function loadGraphSwitcher() {
const container = document.getElementById('graph-switcher');
if (!container) { console.error('[main] no #graph-switcher'); return; }
try {
const headers = {};
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
const r = await fetch('/api/graph/list', { headers });
if (!r.ok) { console.error('[main] graph/list failed:', r.status); return; }
const data = await r.json();
const graphs = data.graphs || data || [];
console.log('[main] graphs:', graphs.length);
// Get current active graph
let activeGraph = '';
try {
const ar = await fetch('/api/graph/active', { headers });
if (ar.ok) {
const ag = await ar.json();
activeGraph = ag.name || '';
}
} catch (e) {}
container.innerHTML = graphs.map(g => {
const active = g.name === activeGraph;
return `<button class="btn-graph${active ? ' active' : ''}" onclick="switchGraph('${g.name}')" title="${g.description}">${g.name}</button>`;
}).join('');
} catch (e) {}
}
window.switchGraph = async (name) => {
try {
const headers = { 'Content-Type': 'application/json' };
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
await fetch('/api/graph/switch', {
method: 'POST', headers,
body: JSON.stringify({ name }),
});
addTrace('runtime', 'graph_switch', name);
clearChat();
clearTrace();
clearDashboard();
clearNodes();
addTrace('runtime', 'switched', `graph: ${name}`);
await initGraph();
loadGraphSwitcher();
} catch (e) {
addTrace('runtime', 'error', 'switch failed: ' + e);
}
};
// Login
// Login button
window.startLogin = startLogin;

View File

@ -2,10 +2,10 @@
import { authToken, isAuthFailed, setAuthFailed, showLogin } from './auth.js';
import { addTrace } from './trace.js';
import { addMsg, handleDelta, handleDone, setWs as setChatWs } from './chat.js';
import { dockControls, dockArtifacts, setWs as setDashWs } from './dashboard.js';
import { handleDelta, handleDone, setWs as setChatWs } from './chat.js';
import { dockControls, setWs as setDashWs } from './dashboard.js';
import { graphAnimate } from './graph.js';
import { updateMeter, updateNodeFromHud, updateAwarenessState, updateAwarenessSensors } from './awareness.js';
import { updateMeter, updateAwarenessState, updateAwarenessSensors } from './awareness.js';
import { updateTestStatus } from './tests.js';
import { truncate, esc } from './util.js';
@ -30,14 +30,12 @@ export function connect() {
setChatWs(ws);
setDashWs(ws);
connectDebugSockets();
restoreHistory();
};
ws.onerror = () => {};
ws.onclose = (e) => {
// 4001 = explicit auth rejection from server
if (e.code === 4001) {
if (e.code === 4001 || e.code === 1006) {
setAuthFailed(true);
localStorage.removeItem('cog_token');
localStorage.removeItem('cog_access_token');
@ -46,10 +44,9 @@ export function connect() {
showLogin();
return;
}
// 1006 = abnormal close (deploy, network), just reconnect
document.getElementById('status').textContent = 'reconnecting...';
document.getElementById('status').style.color = '#f59e0b';
addTrace('runtime', 'disconnected', `code ${e.code}, reconnecting...`);
document.getElementById('status').textContent = 'disconnected';
document.getElementById('status').style.color = '#666';
addTrace('runtime', 'disconnected', 'ws closed');
setTimeout(connect, 2000);
};
@ -61,8 +58,6 @@ export function connect() {
handleDelta(data.content);
} else if (data.type === 'done') {
handleDone();
} else if (data.type === 'artifacts') {
dockArtifacts(data.artifacts);
} else if (data.type === 'controls') {
dockControls(data.controls);
} else if (data.type === 'cleared') {
@ -71,31 +66,6 @@ export function connect() {
};
}
async function restoreHistory() {
try {
const headers = {};
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
const r = await fetch('/api/history?last=20', { headers });
if (!r.ok) return;
const data = await r.json();
const messages = data.messages || [];
if (!messages.length) return;
// Only restore if chat is empty (fresh load)
if (document.getElementById('messages').children.length > 0) return;
for (const msg of messages) {
const el = addMsg(msg.role, '');
if (msg.role === 'assistant') {
// Render as markdown
const { renderMarkdown } = await import('./util.js');
el.innerHTML = renderMarkdown(msg.content || '');
} else {
el.textContent = msg.content || '';
}
}
addTrace('runtime', 'restored', `${messages.length} messages`);
} catch (e) {}
}
function connectDebugSockets() {
const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
const base = proto + '//' + location.host;
@ -153,7 +123,6 @@ function handleHud(data) {
const event = data.event || '';
graphAnimate(event, node);
updateNodeFromHud(node, event, data);
if (event === 'context') {
const count = (data.messages || []).length;

View File

@ -10,16 +10,10 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0;
#test-status .ts-pass { color: #22c55e; }
#test-status .ts-fail { color: #ef4444; }
@keyframes pulse-text { 0%,100% { opacity: 1; } 50% { opacity: 0.5; } }
.btn-top { padding: 0.2rem 0.6rem; font-size: 0.7rem; background: #333; }
.btn-top:hover { background: #ef4444; }
#graph-switcher { display: flex; gap: 3px; }
.btn-graph { padding: 0.2rem 0.5rem; font-size: 0.65rem; font-family: monospace; background: #1a1a1a; color: #888; border: 1px solid #333; border-radius: 3px; cursor: pointer; }
.btn-graph:hover { color: #fff; border-color: #2563eb; }
.btn-graph.active { color: #22c55e; border-color: #22c55e; background: #0a1e14; }
/* === Two-row layout === */
/* Middle row: workspace | node detail | graph */
#middle-row { display: grid; grid-template-columns: 1fr 300px 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; }
#middle-row { display: grid; grid-template-columns: 1fr 200px 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; }
/* Bottom row: chat | awareness | trace */
#bottom-row { display: grid; grid-template-columns: 1fr 1fr 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; }
@ -42,19 +36,12 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0;
/* Node detail / metrics */
.detail-panel { display: flex; flex-direction: column; }
#node-metrics { flex: 1; overflow-y: auto; padding: 0.3rem; display: flex; flex-direction: column; gap: 2px; }
.node-card { background: #111; border-radius: 3px; padding: 0.25rem 0.4rem; border-left: 2px solid #333; }
.node-card.nm-active { border-left-color: #f59e0b; background: #1a1408; }
.node-card.nm-streaming { border-left-color: #22c55e; background: #0a1e14; }
.nc-header { display: flex; align-items: center; gap: 0.3rem; }
.nc-name { font-size: 0.65rem; font-weight: 700; text-transform: uppercase; color: #e0e0e0; min-width: 3rem; }
.nc-model { font-size: 0.55rem; color: #666; font-family: monospace; }
.nc-tokens { font-size: 0.55rem; color: #555; font-family: monospace; margin-left: auto; }
.nc-bar { height: 3px; background: #1a1a1a; border-radius: 2px; overflow: hidden; margin: 2px 0; }
.nc-fill { height: 100%; border-radius: 2px; background: #333; transition: width 0.3s; }
.nc-status { display: flex; gap: 0.3rem; align-items: baseline; }
.nc-event { font-size: 0.55rem; color: #888; font-family: monospace; }
.nc-detail { font-size: 0.55rem; color: #666; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
#node-metrics { flex: 1; overflow-y: auto; padding: 0.3rem; display: flex; flex-direction: column; gap: 1px; }
.node-meter { display: flex; align-items: center; gap: 0.3rem; padding: 0.2rem 0.4rem; background: #111; border-radius: 2px; }
.nm-label { font-size: 0.6rem; font-weight: 700; text-transform: uppercase; letter-spacing: 0.03em; min-width: 3.5rem; color: #888; }
.nm-bar { flex: 1; height: 5px; background: #1a1a1a; border-radius: 3px; overflow: hidden; }
.nm-fill { height: 100%; width: 0%; border-radius: 3px; transition: width 0.3s; background: #333; }
.nm-text { font-size: 0.55rem; color: #555; min-width: 3rem; text-align: right; font-family: monospace; }
/* Graph panel */
.graph-panel { display: flex; flex-direction: column; }
@ -127,10 +114,6 @@ button:hover { background: #1d4ed8; }
.aw-row { display: flex; justify-content: space-between; padding: 0.08rem 0; }
.aw-key { color: #888; font-size: 0.65rem; }
.aw-val { color: #e0e0e0; font-size: 0.7rem; font-weight: 500; }
.aw-exp-conv { color: #4caf50; }
.aw-exp-deleg { color: #ff9800; }
.aw-exp-wait { color: #42a5f5; }
.aw-exp-obs { color: #9e9e9e; }
/* UI Controls (workspace) */
.controls-container { padding: 0.3rem 0; display: flex; flex-wrap: wrap; gap: 0.3rem; align-items: flex-start; }
@ -147,51 +130,6 @@ button:hover { background: #1d4ed8; }
.cd-label { color: #888; }
.cd-value { color: #e0e0e0; margin-left: 0.5rem; }
/* Workspace cards */
.ws-card { background: #111; border: 1px solid #222; border-radius: 0.4rem; padding: 0.5rem 0.6rem; width: 100%; }
.ws-card-clickable { cursor: pointer; }
.ws-card-clickable:hover { border-color: #2563eb; background: #0a1628; }
.ws-card-title { font-size: 0.85rem; font-weight: 700; color: #e0e0e0; }
.ws-card-subtitle { font-size: 0.7rem; color: #888; margin-top: 0.1rem; }
.ws-card-fields { margin-top: 0.4rem; display: flex; flex-direction: column; gap: 0.15rem; }
.ws-card-field { display: flex; justify-content: space-between; font-size: 0.75rem; padding: 0.1rem 0; }
.ws-card-key { color: #888; }
.ws-card-val { color: #e0e0e0; font-weight: 500; }
.ws-card-link { color: #60a5fa; cursor: pointer; font-weight: 500; }
.ws-card-link:hover { text-decoration: underline; }
.ws-card-actions { margin-top: 0.4rem; display: flex; gap: 0.3rem; flex-wrap: wrap; }
.ws-card-btn { font-size: 0.7rem; padding: 0.2rem 0.5rem; }
.ws-list { display: flex; flex-direction: column; gap: 0.3rem; width: 100%; }
.ws-list-title { font-size: 0.75rem; font-weight: 700; color: #888; text-transform: uppercase; letter-spacing: 0.03em; margin-bottom: 0.2rem; }
.ws-card-nested { margin: 0; border-color: #1a1a2e; }
/* Artifact system */
.artifacts-container { padding: 0.3rem 0; display: flex; flex-direction: column; gap: 0.4rem; }
.ws-artifact { width: 100%; }
.ws-artifact-entity { background: #111; border: 1px solid #222; border-radius: 0.4rem; padding: 0.5rem 0.6rem; }
.ws-artifact-data_table { }
.ws-artifact-action_bar { display: flex; flex-wrap: wrap; gap: 0.3rem; }
.ws-artifact-status { padding: 0.25rem 0.4rem; font-size: 0.75rem; display: flex; align-items: center; gap: 0.4rem; }
.ws-artifact-header { font-size: 0.75rem; font-weight: 600; color: #888; margin-bottom: 0.2rem; }
.ws-artifact-fallback { font-size: 0.7rem; color: #666; font-family: monospace; white-space: pre-wrap; }
/* Document page artifact */
.ws-artifact-document_page { background: #111; border: 1px solid #222; border-radius: 0.4rem; padding: 0.8rem 1rem; }
.ws-doc-title { font-size: 1rem; font-weight: 700; color: #e0e0e0; margin-bottom: 0.6rem; border-bottom: 1px solid #333; padding-bottom: 0.4rem; }
.ws-doc-section { margin-bottom: 0.5rem; }
.ws-doc-heading { font-size: 0.8rem; font-weight: 700; color: #a78bfa; margin-bottom: 0.2rem; }
.ws-doc-content { font-size: 0.75rem; color: #ccc; line-height: 1.5; }
.ws-doc-content ul, .ws-doc-content ol { margin: 0.2rem 0; padding-left: 1.2rem; }
/* Machine artifact */
.ws-artifact-machine { background: #111; border: 1px solid #2563eb33; border-radius: 0.4rem; padding: 0.5rem 0.6rem; }
.ws-machine-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.3rem; }
.ws-machine-name { font-size: 0.75rem; font-weight: 600; color: #a78bfa; }
.ws-machine-state { font-size: 0.7rem; color: #60a5fa; background: #1e3a5f; padding: 0.1rem 0.4rem; border-radius: 0.2rem; }
.ws-machine-content { font-size: 0.75rem; color: #ccc; padding: 0.1rem 0; }
.ws-machine-data { display: flex; flex-wrap: wrap; gap: 0.3rem; margin-top: 0.2rem; }
.ws-machine-datum { font-size: 0.65rem; color: #888; background: #1a1a2e; padding: 0.1rem 0.3rem; border-radius: 0.2rem; }
/* Login overlay */
#login-overlay { position: fixed; inset: 0; background: rgba(0,0,0,0.85); display: flex; align-items: center; justify-content: center; z-index: 1000; }
.login-card { background: #1a1a1a; padding: 2rem; border-radius: 0.6rem; text-align: center; }

View File

@ -1,33 +0,0 @@
# Artifact System
Tests that the artifact rendering pipeline works end-to-end.
Expert produces data → UINode converts to artifacts → frontend renders.
## Setup
- clear history
## Steps
### 1. Query produces data_table artifact
- send: show me 3 customers in a table
- expect_trace: has tool_call
- expect_response: length > 10
### 2. Entity detail via card
- send: show me details for customer 1
- expect_trace: has tool_call
- expect_response: length > 10
### 3. Action bar via buttons
- send: create two buttons on my dashboard: Refresh and Export
- expect_actions: length >= 2
- expect_actions: any action contains "refresh" or "Refresh"
### 4. Machine artifact
- send: create a machine called "flow" with initial state "ready" and a state called "done"
- expect_trace: has machine_created
### 5. Query after buttons survive
- send: how many customers are there?
- expect_response: length > 5
- expect_actions: any action contains "refresh" or "Refresh"

View File

@ -1,46 +0,0 @@
# Domain Context
Tests that the expert understands the Eras business domain:
Heizkostenabrechnung, Kunde→Objekt→Nutzeinheit→Geraet hierarchy,
and can formulate correct JOINs without guessing column names.
## Setup
- clear history
## Steps
### 1. Expert knows the hierarchy
- send: wie viele Objekte haben Kunden im Durchschnitt?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "doesn't exist"
- expect_response: length > 20
### 2. Expert can JOIN kunden and objekte
- send: zeig mir die Top 5 Kunden mit den meisten Objekten
- expect_trace: has tool_call
- expect_response: not contains "Error" or "error" or "Unknown column"
- expect_response: length > 20
### 3. Expert understands Nutzeinheiten belong to Objekte
- send: how many Nutzeinheiten does the system have total?
- expect_trace: has tool_call
- expect_response: not contains "Error" or "error" or "Unknown column"
- expect_response: length > 10
### 4. Expert understands Geraete belong to Nutzeinheiten
- send: which Objekt has the most Geraete?
- expect_trace: has tool_call
- expect_response: not contains "Error" or "error" or "Unknown column"
- expect_response: length > 20
### 5. Multi-hop query through hierarchy
- send: zeig alle Nutzer in Objekten von Kunde mit Jaeger im Namen
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "doesn't exist"
- expect_response: contains "Jaeger" or "jaeger"
### 6. PA formulates good job descriptions
- send: gib mir eine Uebersicht ueber Kunde 2
- expect_trace: has routed
- expect_response: length > 20
- expect_response: not contains "clarify" or "specify" or "what kind"

View File

@ -1,64 +0,0 @@
# Eras Domain Mastery
Tests that the expert knows the schema cold — no DESCRIBE at runtime, no SQL errors,
domain-correct responses. The expert is a Heizkostenabrechnung specialist, not a SQL explorer.
## Setup
- clear history
## Steps
### 1. Customer overview
- send: zeig mir die ersten 5 Kunden
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 2. Objekte per Kunde (junction table)
- send: welcher Kunde hat die meisten Objekte?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 3. Nutzeinheiten in an Objekt
- send: wie viele Nutzeinheiten hat Objekt 4?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 5
### 4. Geraete count per Objekt
- send: welches Objekt hat die meisten Geraete?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 5. Full hierarchy traversal (4 tables)
- send: zeig mir alle Nutzer von Kunde 2
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 10
### 6. Address lookup via junction
- send: was ist die Adresse von Objekt 4?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 10
### 7. Verbrauchsdaten query
- send: zeig mir die letzten 5 Verbrauchswerte von Geraet 100
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 10
### 8. Domain language response (not SQL dump)
- send: gib mir eine Zusammenfassung von Kunde 103
- expect_trace: has tool_call
- expect_response: not contains "SELECT" or "JOIN" or "FROM"
- expect_response: length > 30
### 9. Expert does NOT describe at runtime
- send: wie viele Geraete hat Kunde 63?
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: not contains "DESCRIBE" or "describe"
- expect_response: length > 5

View File

@ -1,50 +0,0 @@
# Expectation Tracking
Tests that memorizer tracks user_expectation and it influences PA/Output behavior.
Exercises machine features (update_machine, transition_machine) alongside expectation transitions.
## Setup
- clear history
## Steps
### 1. Greeting sets conversational
- send: hi there!
- expect_response: length > 2
- expect_state: user_expectation is "conversational"
### 2. Create a wizard machine
- send: create a machine called "project" with states: planning (initial) and executing
- expect_trace: has machine_created
### 3. Delegate a task
- send: build me a summary report of the top 5 customers by device count
- expect_response: length > 20
- expect_state: user_expectation is "delegated" or "observing"
### 4. Ask about wizard (status check stays in flow)
- send: what state is my project machine in?
- expect_response: contains "planning" or "project"
- expect_state: user_expectation is "conversational" or "delegated"
### 5. Store data on machine
- send: use update_machine to store status=in_progress on the project machine
- expect_response: length > 5
### 6. Transition machine
- send: use transition_machine to move project to executing state
- expect_response: length > 5
### 7. Verify machine state and data
- send: what is the current state and data of the project machine?
- expect_response: contains "executing" or "in_progress"
### 8. Short nudge triggers waiting_input
- send: und?
- expect_response: length > 5
- expect_state: user_expectation is "waiting_input" or "conversational"
### 9. Quick thanks (observing)
- send: ok danke
- expect_response: length > 0
- expect_state: user_expectation is "observing" or "observational" or "conversational"

View File

@ -1,33 +0,0 @@
# Expert Recovery
Tests that the expert recovers from SQL errors by retrying with corrected queries,
not by reporting the error and stopping.
## Setup
- clear history
## Steps
### 1. Expert recovers from column error silently
- send: zeig mir alle Geraete von Objekt 4 mit Bezeichnung und Einbaudatum
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 2. Multi-table query with potential errors
- send: zeig mir alle Nutzer und ihre Geraete fuer Kunde 2
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
- expect_response: length > 20
### 3. Expert does not give up on first failure
- send: zeig mir Verbrauchswerte fuer Geraet 50 im letzten Monat
- expect_trace: has tool_call
- expect_response: not contains "I need assistance" or "developer" or "schema issue"
- expect_response: length > 10
### 4. Expert retries on unmapped table (abrechnungsinformationen)
- send: zeig mir die letzten 3 Abrechnungsinformationen
- expect_trace: has tool_call
- expect_response: not contains "Unknown column" or "1054"
- expect_response: length > 10

View File

@ -1,41 +0,0 @@
# Machine State → PA Context
Tests that PA reads machine state when routing, and experts can write back to machines.
Validates: enriched machine summary, update_machine, transition_machine.
## Setup
- clear history
## Steps
### 1. Create a machine
- send: create a navigation machine called "wizard" with initial state "start" and a second state called "details"
- expect_trace: has machine_created
### 2. PA sees machine in context
- send: what machines are active on my dashboard?
- expect_response: contains "wizard" or "start"
### 3. Expert stores data on machine
- send: use update_machine to store region=Bayern on the wizard machine
- expect_response: contains "Bayern" or "region" or "stored" or "updated"
### 4. PA sees stored data
- send: what data is stored in my wizard machine?
- expect_response: contains "Bayern" or "region"
### 5. Expert transitions machine to details
- send: use transition_machine to move wizard to details state
- expect_response: length > 5
### 6. PA sees updated state
- send: what state is the wizard in now?
- expect_response: contains "details"
### 7. Expert transitions back
- send: use transition_machine to move wizard back to start
- expect_response: length > 5
### 8. Final state check
- send: tell me the current wizard state and stored data
- expect_response: contains "start"

View File

@ -1,19 +0,0 @@
# PA Retry on Expert Failure
Tests that when expert fails, PA reformulates and retries with a different approach.
## Setup
- clear history
## Steps
### 1. Complex analytical query that may need retry
- send: Finde KWZ-Geraete mit verdaechtigen Verbrauchsspruengen - also wo der Verbrauch zwischen zwei Ablesungen stark ansteigt
- expect_response: length > 20
### 2. Verify results contain device data
- expect_response: contains "Gera" or "gera" or "KWZ" or "kwz" or "Verbrauch" or "device"
### 3. Follow up with details
- send: zeig mir die Verbraeuche von einem dieser Geraete
- expect_response: length > 10

View File

@ -1,7 +1,7 @@
{
"timestamp": "2026-03-30 00:02:55",
"timestamp": "2026-03-29 06:04:47",
"testcases": {
"Artifact System": [
"S3* Audit Corrections": [
{
"step": "Setup",
"check": "clear",
@ -9,355 +9,93 @@
"detail": "cleared"
},
{
"step": "Query produces data_table artifact",
"check": "send: show me 3 customers in a table",
"step": "Tool calls produce results (baseline)",
"check": "send: create two buttons: Alpha and Beta",
"status": "PASS",
"detail": "response: The database contains information for three customers: Kathrin Jager, Leon Schre"
"detail": "response: 👍 Okay, I've created buttons labeled \"Alpha\" and \"Beta\".\n"
},
{
"step": "Query produces data_table artifact",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Query produces data_table artifact",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 138 > 10"
},
{
"step": "Entity detail via card",
"check": "send: show me details for customer 1",
"status": "PASS",
"detail": "response: ```tool_code\nquery_db({\"query\":\"SELECT * FROM customers WHERE customer_id = 1\"})"
},
{
"step": "Entity detail via card",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Entity detail via card",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 84 > 10"
},
{
"step": "Action bar via buttons",
"check": "send: create two buttons on my dashboard: Refr",
"status": "PASS",
"detail": "response: I have added the 'Refresh' and 'Export' buttons to your dashboard. These buttons"
},
{
"step": "Action bar via buttons",
"check": "actions: length >= 2",
"status": "PASS",
"detail": "2 actions >= 2"
},
{
"step": "Action bar via buttons",
"check": "actions: any action contains \"refresh\" or \"Refresh\"",
"status": "PASS",
"detail": "found 'refresh' in actions"
},
{
"step": "Machine artifact",
"check": "send: create a machine called \"flow\" with init",
"status": "PASS",
"detail": "response: OK, I've created a new interactive machine called 'flow' with the initial state "
},
{
"step": "Machine artifact",
"check": "trace: has machine_created",
"status": "PASS",
"detail": "found event 'machine_created'"
},
{
"step": "Query after buttons survive",
"check": "send: how many customers are there?",
"status": "PASS",
"detail": "response: There are 693 customers in the database.\n"
},
{
"step": "Query after buttons survive",
"check": "response: length > 5",
"status": "PASS",
"detail": "length 41 > 5"
},
{
"step": "Query after buttons survive",
"check": "actions: any action contains \"refresh\" or \"Refresh\"",
"status": "PASS",
"detail": "found 'refresh' in actions"
}
],
"Fast v4": [
{
"step": "Setup",
"check": "clear",
"status": "PASS",
"detail": "cleared"
},
{
"step": "Reflex",
"check": "send: hi!",
"status": "PASS",
"detail": "response: Hey Nico! 👋 How can I help you today?\n"
},
{
"step": "Reflex",
"check": "response: length > 2",
"status": "PASS",
"detail": "length 38 > 2"
},
{
"step": "PA routes to expert",
"check": "send: show me 3 customers",
"status": "PASS",
"detail": "response: Alright, I've fetched 3 customer records for you. You can see the ID, Name detai"
},
{
"step": "PA routes to expert",
"check": "trace: has routed",
"status": "PASS",
"detail": "found event 'routed'"
},
{
"step": "PA routes to expert",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "PA routes to expert",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 181 > 10"
},
{
"step": "German query",
"check": "send: Zeig mir alle Tabellen in der Datenbank",
"status": "PASS",
"detail": "response: Okay, ich habe eine Liste aller Tabellen in der \"eras2_production\" Datenbank abg"
},
{
"step": "German query",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "German query",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 303 > 10"
},
{
"step": "Schema discovery",
"check": "send: describe the kunden table",
"status": "PASS",
"detail": "response: The `kunden` table stores customer information, including names, customer number"
},
{
"step": "Schema discovery",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Schema discovery",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 391 > 10"
},
{
"step": "Count query (cached schema)",
"check": "send: how many customers are there?",
"status": "PASS",
"detail": "response: There are 693 customers in the database.\n"
},
{
"step": "Count query (cached schema)",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Count query (cached schema)",
"check": "response: length > 5",
"status": "PASS",
"detail": "length 41 > 5"
},
{
"step": "Complex query",
"check": "send: which customers have the most devices?",
"status": "PASS",
"detail": "response: The query results list the top 10 customers with the most devices. Anne Bürger h"
},
{
"step": "Complex query",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Complex query",
"check": "response: length > 20",
"status": "PASS",
"detail": "length 166 > 20"
},
{
"step": "Error recovery",
"check": "send: SELECT * FROM nichtexistiert",
"status": "PASS",
"detail": "response: Oops! It seems like the table `nichtexistiert` doesn't exist in the database. 🤔 "
},
{
"step": "Error recovery",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Error recovery",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 396 > 10"
},
{
"step": "Memorizer",
"check": "send: my name is Nico",
"status": "PASS",
"detail": "response: Thanks, Nico! I'll remember that. Do you have any other questions? 😊\n"
},
{
"step": "Memorizer",
"check": "state: facts any contains \"Nico\"",
"status": "PASS",
"detail": "found 'Nico' in facts"
},
{
"step": "Language switch",
"check": "send: Hallo wie gehts?",
"status": "PASS",
"detail": "response: Mir geht es gut, danke der Nachfrage, Nico! Und selbst? Gibt es etwas, bei dem i"
},
{
"step": "Language switch",
"check": "state: language is \"de\" or \"mixed\"",
"status": "PASS",
"detail": "language=mixed"
},
{
"step": "Bye",
"check": "send: ok bye",
"status": "PASS",
"detail": "response: Bye Nico! 👋 If you need anything else, just let me know. 😊\n"
},
{
"step": "Bye",
"check": "response: length > 2",
"status": "PASS",
"detail": "length 59 > 2"
}
],
"Dashboard Integration": [
{
"step": "Setup",
"check": "clear",
"status": "PASS",
"detail": "cleared"
},
{
"step": "Expert creates buttons",
"check": "send: create two buttons on my dashboard: Repo",
"status": "PASS",
"detail": "response: I have added 'Report' and 'Export' buttons to your dashboard.\n\n(UI buttons shown"
},
{
"step": "Expert creates buttons",
"check": "actions: length >= 2",
"status": "PASS",
"detail": "3 actions >= 2"
},
{
"step": "Expert creates buttons",
"check": "actions: any action contains \"report\" or \"Report\"",
"status": "PASS",
"detail": "found 'report' in actions"
},
{
"step": "Buttons survive a query",
"check": "send: how many customers are there?",
"status": "PASS",
"detail": "response: I'm running a query to count all customer IDs. One moment...\n"
},
{
"step": "Buttons survive a query",
"check": "response: length > 5",
"status": "PASS",
"detail": "length 61 > 5"
},
{
"step": "Buttons survive a query",
"check": "actions: any action contains \"report\" or \"Report\"",
"status": "PASS",
"detail": "found 'report' in actions"
},
{
"step": "Expert creates a machine",
"check": "send: create a navigation machine called \"work",
"status": "PASS",
"detail": "response: I've created the 'workflow' machine with 'start' and 'step2' states. The 'start'"
},
{
"step": "Expert creates a machine",
"check": "trace: has tool_call create_machine",
"status": "PASS",
"detail": "found create_machine via machine_created event"
},
{
"step": "Expert shows data table",
"check": "send: show me 5 customers in a table",
"status": "PASS",
"detail": "response: Here are five customer entries with their IDs, names, object count, and status:\n"
},
{
"step": "Expert shows data table",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Expert shows data table",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 118 > 10"
},
{
"step": "Expert replaces buttons",
"check": "send: remove all buttons and create one button",
"status": "PASS",
"detail": "response: I have removed the existing 'Report' and 'Export' buttons from the dashboard and"
},
{
"step": "Expert replaces buttons",
"step": "Tool calls produce results (baseline)",
"check": "actions: length >= 1",
"status": "PASS",
"detail": "2 actions >= 1"
},
{
"step": "Expert replaces buttons",
"check": "actions: any action contains \"reset\" or \"Reset\"",
"step": "Tool calls produce results (baseline)",
"check": "actions: any action contains \"alpha\" or \"Alpha\"",
"status": "PASS",
"detail": "found 'reset' in actions"
"detail": "found 'alpha' in actions"
},
{
"step": "Dashboard mismatch triggers re-emit",
"check": "send: I see nothing on my dashboard, fix it",
"status": "PASS",
"detail": "response: 👍 Done — Alpha and Beta buttons are now live on your dashboard. They should appe"
},
{
"step": "Dashboard mismatch triggers re-emit",
"check": "response: not contains \"sorry\" or \"apologize\"",
"status": "PASS",
"detail": "none of ['sorry', 'apologize'] found (as expected)"
},
{
"step": "Dashboard mismatch triggers re-emit",
"check": "actions: length >= 1",
"status": "PASS",
"detail": "2 actions >= 1"
},
{
"step": "DB error triggers retry with corrected SQL",
"check": "send: SELECT * FROM NichtExistent LIMIT 5",
"status": "PASS",
"detail": "response: Ah, it seems like the table `NichtExistent` does not exist. Double-check the tab"
},
{
"step": "DB error triggers retry with corrected SQL",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "DB error triggers retry with corrected SQL",
"check": "response: not contains \"1146\"",
"status": "PASS",
"detail": "none of ['1146'] found (as expected)"
},
{
"step": "DB error triggers retry with corrected SQL",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 163 > 10"
},
{
"step": "Complex request gets Director plan",
"check": "send: investigate which customers have the mos",
"status": "PASS",
"detail": "response: Okay, I'll look into which customers have the most devices. This might take a mo"
},
{
"step": "Complex request gets Director plan",
"check": "trace: has director_plan",
"status": "FAIL",
"detail": "no 'director_plan' event in trace"
},
{
"step": "Complex request gets Director plan",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Complex request gets Director plan",
"check": "response: length > 20",
"status": "PASS",
"detail": "length 86 > 20"
}
]
},
"summary": {
"passed": 58,
"failed": 0
"passed": 14,
"failed": 1
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,40 +0,0 @@
# Workspace Components
Tests that the expert emits structured UI components (cards, lists, tables)
instead of dumping text or raw SQL. The workspace should show domain-aware displays.
## Setup
- clear history
## Steps
### 1. Detail card for a single entity
- send: zeig mir Details zu Kunde 2
- expect_trace: has tool_call
- expect_actions: has card
- expect_response: not contains "SELECT" or "JOIN"
- expect_response: length > 10
### 2. List of items with navigation
- send: zeig mir alle Objekte von Kunde 2
- expect_trace: has tool_call
- expect_actions: has card or has table
- expect_response: length > 10
### 3. Table for tabular data
- send: zeig mir die Geraete von Objekt 4
- expect_trace: has tool_call
- expect_actions: has table
- expect_response: length > 10
### 4. Card with actions (drill-down buttons)
- send: zeig mir Auftrag 21479
- expect_trace: has tool_call
- expect_actions: length >= 1
- expect_response: length > 10
### 5. Summary card with key metrics
- send: gib mir eine Zusammenfassung von Objekt 4
- expect_trace: has tool_call
- expect_actions: has card
- expect_response: length > 20