Compare commits
No commits in common. "925fff731f1b6b618c7573551f9526b933231903" and "3a9c2795cfa7556203c49dedaaeadca8a2271c06" have entirely different histories.
925fff731f
...
3a9c2795cf
35
agent/api.py
35
agent/api.py
@ -153,29 +153,12 @@ def register_routes(app):
|
|||||||
msg = json.loads(data)
|
msg = json.loads(data)
|
||||||
# Always use current runtime (may change after graph switch)
|
# Always use current runtime (may change after graph switch)
|
||||||
rt = _active_runtime or runtime
|
rt = _active_runtime or runtime
|
||||||
try:
|
|
||||||
if msg.get("type") == "action":
|
if msg.get("type") == "action":
|
||||||
action = msg.get("action", "unknown")
|
await rt.handle_action(msg.get("action", "unknown"), msg.get("data"))
|
||||||
data_payload = msg.get("data")
|
|
||||||
if hasattr(rt, 'use_frames') and rt.use_frames:
|
|
||||||
# Frame engine handles actions as ACTION: prefix messages
|
|
||||||
action_text = f"ACTION:{action}"
|
|
||||||
if data_payload:
|
|
||||||
action_text += f"|data:{json.dumps(data_payload)}"
|
|
||||||
await rt.handle_message(action_text)
|
|
||||||
else:
|
|
||||||
await rt.handle_action(action, data_payload)
|
|
||||||
elif msg.get("type") == "cancel_process":
|
elif msg.get("type") == "cancel_process":
|
||||||
rt.process_manager.cancel(msg.get("pid", 0))
|
rt.process_manager.cancel(msg.get("pid", 0))
|
||||||
else:
|
else:
|
||||||
await rt.handle_message(msg.get("text", ""), dashboard=msg.get("dashboard"))
|
await rt.handle_message(msg.get("text", ""), dashboard=msg.get("dashboard"))
|
||||||
except Exception as e:
|
|
||||||
import traceback
|
|
||||||
log.error(f"[ws] handler error: {e}\n{traceback.format_exc()}")
|
|
||||||
try:
|
|
||||||
await ws.send_text(json.dumps({"type": "hud", "node": "runtime", "event": "error", "detail": str(e)[:200]}))
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
except WebSocketDisconnect:
|
except WebSocketDisconnect:
|
||||||
if _active_runtime:
|
if _active_runtime:
|
||||||
_active_runtime.detach_ws()
|
_active_runtime.detach_ws()
|
||||||
@ -364,7 +347,6 @@ def register_routes(app):
|
|||||||
"language": "en",
|
"language": "en",
|
||||||
"style_hint": "casual, technical",
|
"style_hint": "casual, technical",
|
||||||
"facts": [],
|
"facts": [],
|
||||||
"user_expectation": "conversational",
|
|
||||||
}
|
}
|
||||||
_pipeline_result = {"status": "idle", "id": "", "stage": "cleared"}
|
_pipeline_result = {"status": "idle", "id": "", "stage": "cleared"}
|
||||||
# Notify frontend via WS
|
# Notify frontend via WS
|
||||||
@ -398,26 +380,11 @@ def register_routes(app):
|
|||||||
from .engine import load_graph, get_graph_for_cytoscape
|
from .engine import load_graph, get_graph_for_cytoscape
|
||||||
from .runtime import _active_graph_name
|
from .runtime import _active_graph_name
|
||||||
graph = load_graph(_active_graph_name)
|
graph = load_graph(_active_graph_name)
|
||||||
# Include model info from instantiated nodes if runtime exists
|
|
||||||
node_details = {}
|
|
||||||
if _active_runtime:
|
|
||||||
for role, impl_name in graph["nodes"].items():
|
|
||||||
# Find the node instance by role
|
|
||||||
node_inst = getattr(_active_runtime, 'frame_engine', None)
|
|
||||||
if node_inst and hasattr(node_inst, 'nodes'):
|
|
||||||
inst = node_inst.nodes.get(role)
|
|
||||||
if inst:
|
|
||||||
node_details[role] = {
|
|
||||||
"impl": impl_name,
|
|
||||||
"model": getattr(inst, 'model', None) or '',
|
|
||||||
"max_tokens": getattr(inst, 'max_context_tokens', 0),
|
|
||||||
}
|
|
||||||
return {
|
return {
|
||||||
"name": graph["name"],
|
"name": graph["name"],
|
||||||
"description": graph["description"],
|
"description": graph["description"],
|
||||||
"nodes": graph["nodes"],
|
"nodes": graph["nodes"],
|
||||||
"edges": graph["edges"],
|
"edges": graph["edges"],
|
||||||
"node_details": node_details,
|
|
||||||
"cytoscape": get_graph_for_cytoscape(graph),
|
"cytoscape": get_graph_for_cytoscape(graph),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -302,59 +302,9 @@ class FrameEngine:
|
|||||||
expert.send_hud = original_hud
|
expert.send_hud = original_hud
|
||||||
|
|
||||||
thought_summary = (f"response[{len(thought.response)}] tool={thought.tool_used or 'none'} "
|
thought_summary = (f"response[{len(thought.response)}] tool={thought.tool_used or 'none'} "
|
||||||
f"actions={len(thought.actions)} errors={len(thought.errors)}")
|
f"actions={len(thought.actions)}")
|
||||||
has_tool = bool(thought.tool_used and thought.tool_output)
|
has_tool = bool(thought.tool_used and thought.tool_output)
|
||||||
|
|
||||||
# PA retry: if expert failed OR skipped tools when data was needed
|
|
||||||
expectation = self.memorizer.state.get("user_expectation", "conversational")
|
|
||||||
# Detect hallucination: expert returned no tool output for a data job
|
|
||||||
job_needs_data = any(k in (routing.job or "").lower()
|
|
||||||
for k in ["query", "select", "tabelle", "table", "daten", "data",
|
|
||||||
"cost", "kosten", "count", "anzahl", "average", "schnitt",
|
|
||||||
"find", "finde", "show", "zeig", "list", "beschreib"])
|
|
||||||
expert_skipped_tools = not has_tool and not thought.errors and job_needs_data
|
|
||||||
if (thought.errors or expert_skipped_tools) and not has_tool and expectation in ("delegated", "waiting_input", "conversational"):
|
|
||||||
retry_reason = f"{len(thought.errors)} errors" if thought.errors else "no tool calls for data job"
|
|
||||||
self._end_frame(rec, output_summary=thought_summary,
|
|
||||||
route="pa_retry", condition=f"expert_failed ({retry_reason}), expectation={expectation}")
|
|
||||||
await self._send_hud({"node": "runtime", "event": "pa_retry",
|
|
||||||
"detail": f"expert failed: {retry_reason}, retrying via PA"})
|
|
||||||
|
|
||||||
# Stream retry notice to user
|
|
||||||
retry_msg = "Anderer Ansatz..." if routing.language == "de" else "Trying a different approach..."
|
|
||||||
await self.sink.send_delta(retry_msg + "\n")
|
|
||||||
|
|
||||||
# PA reformulates with error context
|
|
||||||
retry_errors = thought.errors if thought.errors else [
|
|
||||||
{"query": "(none)", "error": "Expert produced no database queries. The job requires data lookup but the expert answered without querying. Reformulate with explicit query instructions."}
|
|
||||||
]
|
|
||||||
error_summary = "; ".join(e.get("error", "")[:80] for e in retry_errors[-2:])
|
|
||||||
rec = self._begin_frame(self.frame + 1, "pa_retry",
|
|
||||||
input_summary=f"errors: {error_summary[:100]}")
|
|
||||||
routing2 = await self.nodes["pa"].route_retry(
|
|
||||||
command, self.history, memory_context=mem_ctx,
|
|
||||||
identity=self.identity, channel=self.channel,
|
|
||||||
original_job=routing.job, errors=retry_errors)
|
|
||||||
self._end_frame(rec, output_summary=f"retry_job: {(routing2.job or '')[:60]}",
|
|
||||||
route=f"expert_{routing2.expert}" if routing2.expert != "none" else "output")
|
|
||||||
|
|
||||||
if routing2.expert != "none":
|
|
||||||
expert2 = self._experts.get(routing2.expert, expert)
|
|
||||||
rec = self._begin_frame(self.frame + 1, f"expert_{routing2.expert}_retry",
|
|
||||||
input_summary=f"retry job: {(routing2.job or '')[:80]}")
|
|
||||||
original_hud2 = expert2.send_hud
|
|
||||||
expert2.send_hud = self._make_progress_wrapper(original_hud2, routing2.language)
|
|
||||||
try:
|
|
||||||
thought = await expert2.execute(routing2.job, routing2.language)
|
|
||||||
finally:
|
|
||||||
expert2.send_hud = original_hud2
|
|
||||||
thought_summary = (f"response[{len(thought.response)}] tool={thought.tool_used or 'none'} "
|
|
||||||
f"errors={len(thought.errors)}")
|
|
||||||
has_tool = bool(thought.tool_used and thought.tool_output)
|
|
||||||
self._end_frame(rec, output_summary=thought_summary,
|
|
||||||
route="interpreter" if has_tool else "output+ui")
|
|
||||||
routing = routing2 # use retry routing for rest of pipeline
|
|
||||||
|
|
||||||
# Interpreter (conditional)
|
# Interpreter (conditional)
|
||||||
if self.has_interpreter and has_tool:
|
if self.has_interpreter and has_tool:
|
||||||
self._end_frame(rec, output_summary=thought_summary,
|
self._end_frame(rec, output_summary=thought_summary,
|
||||||
@ -573,7 +523,7 @@ class FrameEngine:
|
|||||||
return self._make_result(result)
|
return self._make_result(result)
|
||||||
|
|
||||||
# Complex action — needs full pipeline
|
# Complex action — needs full pipeline
|
||||||
self._end_frame(rec, output_summary="no local handler", route="pa/director/thinker")
|
self._end_frame(rec, output_summary="no local handler", route="director/thinker")
|
||||||
|
|
||||||
action_desc = f"ACTION: {action}"
|
action_desc = f"ACTION: {action}"
|
||||||
if data:
|
if data:
|
||||||
@ -585,9 +535,7 @@ class FrameEngine:
|
|||||||
analysis=InputAnalysis(intent="action", topic=action, complexity="simple"),
|
analysis=InputAnalysis(intent="action", topic=action, complexity="simple"),
|
||||||
source_text=action_desc)
|
source_text=action_desc)
|
||||||
|
|
||||||
if self.has_pa:
|
if self.has_director:
|
||||||
return await self._run_expert_pipeline(command, mem_ctx, dashboard)
|
|
||||||
elif self.has_director:
|
|
||||||
return await self._run_director_pipeline(command, mem_ctx, dashboard)
|
return await self._run_director_pipeline(command, mem_ctx, dashboard)
|
||||||
else:
|
else:
|
||||||
return await self._run_thinker_pipeline(command, mem_ctx, dashboard)
|
return await self._run_thinker_pipeline(command, mem_ctx, dashboard)
|
||||||
@ -657,10 +605,6 @@ class FrameEngine:
|
|||||||
response, controls = await asyncio.gather(output_task, ui_task)
|
response, controls = await asyncio.gather(output_task, ui_task)
|
||||||
if controls:
|
if controls:
|
||||||
await self.sink.send_controls(controls)
|
await self.sink.send_controls(controls)
|
||||||
# Send artifacts (new system) alongside controls
|
|
||||||
artifacts = self.ui_node.get_artifacts()
|
|
||||||
if artifacts:
|
|
||||||
await self.sink.send_artifacts(artifacts)
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _check_condition(self, name: str, command: Command = None,
|
def _check_condition(self, name: str, command: Command = None,
|
||||||
@ -678,7 +622,6 @@ class FrameEngine:
|
|||||||
return {
|
return {
|
||||||
"response": response,
|
"response": response,
|
||||||
"controls": self.ui_node.current_controls,
|
"controls": self.ui_node.current_controls,
|
||||||
"artifacts": self.ui_node.get_artifacts(),
|
|
||||||
"memorizer": self.memorizer.state,
|
"memorizer": self.memorizer.state,
|
||||||
"frames": self.frame,
|
"frames": self.frame,
|
||||||
"trace": self.last_trace.to_dict(),
|
"trace": self.last_trace.to_dict(),
|
||||||
|
|||||||
@ -18,9 +18,6 @@ class Node:
|
|||||||
self.context_fill_pct = 0
|
self.context_fill_pct = 0
|
||||||
|
|
||||||
async def hud(self, event: str, **data):
|
async def hud(self, event: str, **data):
|
||||||
# Always include model on context events so frontend knows what model each node uses
|
|
||||||
if event == "context" and self.model:
|
|
||||||
data["model"] = self.model
|
|
||||||
await self.send_hud({"node": self.name, "event": event, **data})
|
await self.send_hud({"node": self.name, "event": event, **data})
|
||||||
|
|
||||||
def trim_context(self, messages: list[dict]) -> list[dict]:
|
def trim_context(self, messages: list[dict]) -> list[dict]:
|
||||||
|
|||||||
@ -1,8 +1,4 @@
|
|||||||
"""Eras Expert: Heizkostenabrechnung domain specialist.
|
"""Eras Expert: heating/energy customer database specialist."""
|
||||||
|
|
||||||
The expert knows the full database schema. No DESCRIBE at runtime.
|
|
||||||
All queries use verified column names and JOIN patterns.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
@ -17,209 +13,55 @@ class ErasExpertNode(ExpertNode):
|
|||||||
name = "eras_expert"
|
name = "eras_expert"
|
||||||
default_database = "eras2_production"
|
default_database = "eras2_production"
|
||||||
|
|
||||||
DOMAIN_SYSTEM = """You are the Eras domain expert for Heizkostenabrechnung (German heating cost billing).
|
DOMAIN_SYSTEM = """You are the Eras expert — specialist for heating and energy customer data.
|
||||||
|
You work with the eras2_production database containing customer, device, and billing data.
|
||||||
|
All table and column names are German (lowercase). Common queries involve customer lookups,
|
||||||
|
device counts, consumption analysis, and billing reports."""
|
||||||
|
|
||||||
BUSINESS CONTEXT:
|
SCHEMA = """Known tables (eras2_production):
|
||||||
Eras is software for Hausverwaltungen and Messdienste who manage properties, meters, and billings.
|
- kunden — customers
|
||||||
The USER of this agent is an Eras customer exploring their data. They think in domain terms
|
- objekte — properties/objects linked to customers
|
||||||
(Kunden, Objekte, Wohnungen, Zaehler) — NOT in SQL. Never expose SQL or table names to the user.
|
- nutzeinheit — usage units within objects
|
||||||
|
- geraete — devices/meters
|
||||||
|
- geraeteverbraeuche — device consumption readings
|
||||||
|
- abrechnungen — billing records
|
||||||
|
|
||||||
DOMAIN MODEL:
|
CRITICAL: You do NOT know the exact column names. They are German and unpredictable.
|
||||||
- Kunden = property managers (Hausverwaltungen). 693 in the system.
|
Your FIRST tool_sequence step for ANY SELECT query MUST be DESCRIBE on the target table.
|
||||||
- Objekte = buildings/Liegenschaften managed by Kunden. 780 total. Linked via objektkunde (m:n).
|
Then use the actual column names from the DESCRIBE result in your SELECT.
|
||||||
- Nutzeinheiten = apartments/units inside Objekte. 4578 total.
|
|
||||||
- Nutzer = tenants/occupants of Nutzeinheiten. 8206 total.
|
|
||||||
- Geraete = measurement devices (Heizkostenverteiler, Zaehler). 56726 total.
|
|
||||||
- Verbraeuche = consumption readings from Geraete. 1.3M readings.
|
|
||||||
- Adressen = postal addresses, linked via objektadressen/kundenadressen.
|
|
||||||
|
|
||||||
RESPOND IN DOMAIN LANGUAGE:
|
Example tool_sequence for "show me 5 customers":
|
||||||
- Say "Kunde Jaeger hat 3 Objekte" not "SELECT COUNT..."
|
[
|
||||||
- Say "12 Wohnungen mit 45 Geraeten" not "nutzeinheit rows"
|
{{"tool": "query_db", "args": {{"query": "DESCRIBE kunden", "database": "eras2_production"}}}},
|
||||||
- Present data as summaries, not raw tables"""
|
{{"tool": "query_db", "args": {{"query": "SELECT * FROM kunden LIMIT 5", "database": "eras2_production"}}}}
|
||||||
|
]"""
|
||||||
|
|
||||||
SCHEMA = """COMPLETE DATABASE SCHEMA (eras2_production) — use these exact column names:
|
def __init__(self, send_hud, process_manager=None):
|
||||||
|
super().__init__(send_hud, process_manager)
|
||||||
|
self._schema_cache: dict[str, str] = {} # table_name -> DESCRIBE result
|
||||||
|
|
||||||
=== kunden (693 rows) ===
|
async def execute(self, job: str, language: str = "de"):
|
||||||
PK: ID (int)
|
"""Execute with schema auto-discovery. Caches DESCRIBE results."""
|
||||||
Name1, Name2, Name3 (longtext) — customer name parts
|
# Inject cached schema into the job context
|
||||||
Kundennummer (longtext) — customer number
|
if self._schema_cache:
|
||||||
AnredeID (FK), BriefanredeID (FK), ZugeordneterKomplettdruckID (FK)
|
schema_ctx = "Known column names from previous DESCRIBE:\n"
|
||||||
Anmerkung, Fremdnummer, Ansprechpartner (longtext)
|
for table, desc in self._schema_cache.items():
|
||||||
Steuernummer, UmsatzsteuerID (longtext)
|
# Just first 5 lines to keep it compact
|
||||||
HatHistorie, IstWebkunde, IstNettoKunde, BrennstoffkostenNachFIFO, BelegePerEmail (bool)
|
lines = desc.strip().split("\n")[:6]
|
||||||
MietpreisAnpassungProzent (decimal)
|
schema_ctx += f"\n{table}:\n" + "\n".join(lines) + "\n"
|
||||||
|
job = job + "\n\n" + schema_ctx
|
||||||
|
|
||||||
=== objektkunde (911 rows) — JUNCTION: kunden ↔ objekte (many-to-many) ===
|
result = await super().execute(job, language)
|
||||||
PK: ID (int)
|
|
||||||
KundeID (FK → kunden.ID)
|
|
||||||
ObjektID (FK → objekte.ID)
|
|
||||||
ZeitraumVon, ZeitraumBis (datetime)
|
|
||||||
IstKunde, IstEigentuemer, IstRechnungsempfaenger, IstAbrechnungsempfaenger (bool)
|
|
||||||
|
|
||||||
=== objekte (780 rows) ===
|
# Cache any DESCRIBE results from this execution
|
||||||
PK: ID (int)
|
# Parse from tool_output if it looks like a DESCRIBE result
|
||||||
Objektnummer (longtext) — building reference number
|
if result.tool_output and "Field\t" in result.tool_output:
|
||||||
AbleserID, MonteurID, UVIRefObjektID, ZugeordneterKomplettdruckID (FK)
|
# Try to identify which table was described
|
||||||
Anmerkung, AnmerkungIntern (longtext)
|
for table in ["kunden", "objekte", "nutzeinheit", "geraete",
|
||||||
HatHistorie, VorauszahlungGetrennt, Selbstablesung, IstObjektFreigegeben (bool)
|
"geraeteverbraeuche", "abrechnungen"]:
|
||||||
|
if table in job.lower() or table in result.tool_output.lower():
|
||||||
|
self._schema_cache[table] = result.tool_output
|
||||||
|
log.info(f"[eras] cached schema for {table}")
|
||||||
|
break
|
||||||
|
|
||||||
=== objektadressen — JUNCTION: objekte ↔ adressen ===
|
return result
|
||||||
PK: ID, ObjektID (FK → objekte.ID), AdresseID (FK → adressen.ID), IstPrimaer (bool)
|
|
||||||
|
|
||||||
=== kundenadressen — JUNCTION: kunden ↔ adressen ===
|
|
||||||
PK: ID, KundeID (FK → kunden.ID), AdresseID (FK → adressen.ID), TypDerAdresseID (FK)
|
|
||||||
|
|
||||||
=== adressen (1762 rows) ===
|
|
||||||
PK: ID (int)
|
|
||||||
Strasse, Hausnummer, Postleitzahl, Ort, Adresszusatz, Postfach (longtext)
|
|
||||||
LandID (FK), Laengengrad, Breitengrad (double)
|
|
||||||
|
|
||||||
=== nutzeinheit (4578 rows) ===
|
|
||||||
PK: ID (int)
|
|
||||||
ObjektID (FK → objekte.ID)
|
|
||||||
NeNummerInt (longtext) — unit number
|
|
||||||
Lage, Stockwerk, Flaeche, Nutzflaeche (various)
|
|
||||||
AdresseID (FK), CustomStatusKeyID (FK)
|
|
||||||
|
|
||||||
=== kundenutzeinheit — JUNCTION: kunden ↔ nutzeinheit ===
|
|
||||||
PK: ID, KundeID (FK → kunden.ID), NutzeinheitID (FK → nutzeinheit.ID), Von, Bis (datetime)
|
|
||||||
|
|
||||||
=== nutzer (8206 rows) — tenants/occupants ===
|
|
||||||
PK: ID (int)
|
|
||||||
NutzeinheitID (FK → nutzeinheit.ID)
|
|
||||||
Name1, Name2, Name3, Name4 (longtext) — tenant name
|
|
||||||
NutzungVon, NutzungBis (datetime)
|
|
||||||
ArtDerNutzung (int), AnredeID (FK), BriefanredeID (FK)
|
|
||||||
IstGesperrt, Selbstableser (bool)
|
|
||||||
|
|
||||||
=== geraete (56726 rows) — meters/devices ===
|
|
||||||
PK: ID (int)
|
|
||||||
NutzeinheitID (FK → nutzeinheit.ID)
|
|
||||||
Geraetenummer (longtext) — device number/serial
|
|
||||||
Bezeichnung (longtext) — device name/label
|
|
||||||
Beschreibung (longtext) — description
|
|
||||||
ArtikelID (FK), NutzergruppenID (FK), Einheit (int)
|
|
||||||
Einbaudatum, Ausbaudatum, GeeichtBis, GeeichtAm, ErstInbetriebnahme, DefektAb (datetime)
|
|
||||||
FirmwareVersion, LaufendeNummer, GruppenKennung, Memo, AllgemeinesMemo (longtext)
|
|
||||||
AnsprechpartnerID, ZugeordneterRaumID, CustomStatusKeyID (FK)
|
|
||||||
Gemietet, Gewartet, KeinAndruck, IstAbzuziehendesGeraet, HatHistorie (bool)
|
|
||||||
|
|
||||||
=== geraeteverbraeuche (1.3M rows) — consumption readings ===
|
|
||||||
PK: ID (int)
|
|
||||||
GeraetID (FK → geraete.ID)
|
|
||||||
Ablesedatum (datetime) — reading date
|
|
||||||
Ablesung (double) — meter reading value
|
|
||||||
Verbrauch (double) — consumption value
|
|
||||||
Faktor (double) — factor
|
|
||||||
Aenderungsdatum (datetime)
|
|
||||||
AbleseartID (FK), Schaetzung (int), Status (int)
|
|
||||||
IstRekonstruiert (bool), Herkunft (int)
|
|
||||||
ManuellerWert (double), Rohablesung (double)
|
|
||||||
Anmerkung, Fehler, Ampullenfarbe (longtext)
|
|
||||||
|
|
||||||
=== auftraege (2960 rows) — billing work orders ===
|
|
||||||
PK: ID (int)
|
|
||||||
AuftragNummer, Bezeichnung (longtext)
|
|
||||||
ErstellDatum, Abgeschlossen (datetime)
|
|
||||||
ZugeordneteAbrechnungsinformationID (FK → abrechnungsinformationen.ID)
|
|
||||||
ErstellMitarbeiterID (FK), AuftragsTyp (int), Status (int)
|
|
||||||
Anmerkung, ObererText, UntererText (longtext)
|
|
||||||
|
|
||||||
=== auftragspositionen (5094 rows) — line items per work order ===
|
|
||||||
PK: ID (int)
|
|
||||||
AuftragID (FK → auftraege.ID)
|
|
||||||
ArtikelID (FK → artikel.ID)
|
|
||||||
SollMenge, IstMenge (int)
|
|
||||||
ZugeordneterGeraeteArtikelID (FK), ZugeordneteVertragPositionID (FK)
|
|
||||||
|
|
||||||
=== artikelposition (70164 rows) — billing line items with prices ===
|
|
||||||
PK: ID (int)
|
|
||||||
ZugewiesenerArtikelID (FK → artikel.ID)
|
|
||||||
ZugewieseneAbrechnungID (FK → abrechnungsinformationen.ID)
|
|
||||||
RechnungID (FK → rechnung.ID)
|
|
||||||
MengeVorgabe, Menge (decimal), NettoVorgabe, Netto (decimal), MWST (decimal)
|
|
||||||
Rechnungsart (int), VorschussBerechnung (bool), ARechnung (bool)
|
|
||||||
VerstecktInNebenkostenID (FK), ZugeordneteVertragPositionID (FK)
|
|
||||||
|
|
||||||
=== artikel (1078 rows) — service/product catalog ===
|
|
||||||
PK: ID (int)
|
|
||||||
Artikelnummer, Bezeichnung (longtext)
|
|
||||||
Netto (decimal), MWST (decimal)
|
|
||||||
BerechnungsZiel (int), UmlageIn (int)
|
|
||||||
ZugeordnetePreislisteID (FK)
|
|
||||||
IstStandard, ARechnung, AppZusatz, IstEigenKostenpos (bool)
|
|
||||||
|
|
||||||
=== rechnung (7356 rows) — invoices ===
|
|
||||||
PK: ID (int)
|
|
||||||
Rechnungsnummer (longtext), Rechnungsart (int)
|
|
||||||
BezahltAm (datetime), BezahlterBetrag (decimal)
|
|
||||||
Druckdatum, Erstelldatum, Exportdatum (datetime)
|
|
||||||
AbrechnungsinformationID (FK → abrechnungsinformationen.ID)
|
|
||||||
AbschlagSummeSonder, AbschlagSummeStandard (decimal)
|
|
||||||
Bankeinzug (bool)
|
|
||||||
|
|
||||||
=== abrechnungsinformationen (4261 rows) — billing periods/settings ===
|
|
||||||
PK: ID (int)
|
|
||||||
Von, Bis (datetime) — billing period
|
|
||||||
AbrechnungHeizung, AbrechnungWarmwasser, AbrechnungNebenkosten, AbrechnungKaltwasser (bool)
|
|
||||||
Tarifabrechnung, BHKW, HeizsaldoInNebenkosten, AbrechnungLegionellen, AbrechnungRauchmelder (bool)
|
|
||||||
|
|
||||||
=== nebenkosten (42209 rows) — ancillary cost items ===
|
|
||||||
PK: ID (int)
|
|
||||||
Von, Bis (datetime)
|
|
||||||
Bezeichnung (longtext), Mwst (decimal), Brutto (decimal)
|
|
||||||
EinheitDerKostenart (longtext), Umlage (int), UmlageZiel (int)
|
|
||||||
ZugeordnetesObjektID (FK → objekte.ID)
|
|
||||||
NurEigentuemer, NurNutzer (bool)
|
|
||||||
|
|
||||||
=== vorauszahlungen (83932 rows) — advance payments per tenant ===
|
|
||||||
PK: ID (int)
|
|
||||||
ZugeordneterNutzerID (FK → nutzer.ID)
|
|
||||||
BetragNebenkosten, BetragHeizkosten, BetragWarmwasser (decimal)
|
|
||||||
Von, Bis (datetime), IstNetto (bool)
|
|
||||||
|
|
||||||
=== heizbetriebskosten (22557 rows) — heating operation costs ===
|
|
||||||
PK: ID (int)
|
|
||||||
Von, Bis (datetime), Bezeichnung (longtext)
|
|
||||||
Mwst (decimal), Brutto (decimal), Art (int)
|
|
||||||
ZugeordnetesObjektID (FK → objekte.ID)
|
|
||||||
ZugeordneteVerbrauchsgruppeID (FK)
|
|
||||||
|
|
||||||
=== brennstofflieferungen (6477 rows) — fuel deliveries ===
|
|
||||||
PK: ID (int)
|
|
||||||
GeliefertAm (datetime), Menge (decimal), Betrag (decimal)
|
|
||||||
Mwst (decimal), Heizwert (decimal)
|
|
||||||
Anfangsstand, Endstand (decimal)
|
|
||||||
ZugeordneterEnergieVerwerterID (FK), BrennstoffMediumID (FK)
|
|
||||||
ZugeordneteAbrechnungsinformationID (FK → abrechnungsinformationen.ID)
|
|
||||||
|
|
||||||
=== vertragpositionen (4395 rows) — contract line items ===
|
|
||||||
PK: ID (int)
|
|
||||||
LaufzeitVon, LaufzeitBis (datetime)
|
|
||||||
Menge (decimal), Gesamtpreis (decimal), PreisProEinheit (decimal), Mwst (decimal)
|
|
||||||
ArtikelID (FK → artikel.ID), VertragNummer (longtext)
|
|
||||||
Art (int), Umlage (int)
|
|
||||||
|
|
||||||
JOIN PATTERNS (use exactly):
|
|
||||||
Kunde → Objekte: JOIN objektkunde ok ON ok.KundeID = k.ID JOIN objekte o ON o.ID = ok.ObjektID
|
|
||||||
Objekt → Adresse: JOIN objektadressen oa ON oa.ObjektID = o.ID JOIN adressen a ON a.ID = oa.AdresseID
|
|
||||||
Kunde → Adresse: JOIN kundenadressen ka ON ka.KundeID = k.ID JOIN adressen a ON a.ID = ka.AdresseID
|
|
||||||
Objekt → NE: JOIN nutzeinheit ne ON ne.ObjektID = o.ID
|
|
||||||
NE → Nutzer: JOIN nutzer nu ON nu.NutzeinheitID = ne.ID
|
|
||||||
NE → Geraete: JOIN geraete g ON g.NutzeinheitID = ne.ID
|
|
||||||
Geraet → Verbrauch: JOIN geraeteverbraeuche gv ON gv.GeraetID = g.ID
|
|
||||||
Auftrag → Positionen: JOIN auftragspositionen ap ON ap.AuftragID = a.ID
|
|
||||||
Auftrag → Abrechnung: JOIN abrechnungsinformationen ai ON ai.ID = a.ZugeordneteAbrechnungsinformationID
|
|
||||||
Artikelpos → Artikel: JOIN artikel art ON art.ID = ap.ZugewiesenerArtikelID
|
|
||||||
Artikelpos → Rechnung: JOIN rechnung r ON r.ID = ap.RechnungID
|
|
||||||
Artikelpos → Abrechnung: JOIN abrechnungsinformationen ai ON ai.ID = ap.ZugewieseneAbrechnungID
|
|
||||||
Nebenkosten → Objekt: JOIN objekte o ON o.ID = nk.ZugeordnetesObjektID
|
|
||||||
Vorauszahlung → Nutzer: JOIN nutzer nu ON nu.ID = vz.ZugeordneterNutzerID
|
|
||||||
|
|
||||||
RULES:
|
|
||||||
- For tables listed above: use ONLY the listed column names. Never guess.
|
|
||||||
- For tables NOT listed above: use SELECT * with LIMIT to discover columns.
|
|
||||||
- If a query fails, the retry system will show you the error. Fix the column name and try again.
|
|
||||||
- Always LIMIT large queries (max 50 rows).
|
|
||||||
- Use LEFT JOIN when results might be empty."""
|
|
||||||
|
|||||||
@ -38,38 +38,28 @@ Given a job description, produce a JSON tool sequence to accomplish it.
|
|||||||
|
|
||||||
Available tools:
|
Available tools:
|
||||||
- query_db(query, database) — SQL SELECT/DESCRIBE/SHOW only
|
- query_db(query, database) — SQL SELECT/DESCRIBE/SHOW only
|
||||||
- emit_actions(actions) — show buttons [{label, action, payload?}]
|
- emit_actions(actions) — show buttons [{{label, action, payload?}}]
|
||||||
- set_state(key, value) — persistent key-value
|
- set_state(key, value) — persistent key-value
|
||||||
- create_machine(id, initial, states) — interactive UI navigation
|
- emit_display(items) — formatted data [{{type, label, value?, style?}}]
|
||||||
- add_state / reset_machine / destroy_machine — machine lifecycle
|
- create_machine(id, initial, states) — interactive UI with navigation
|
||||||
- update_machine(id, data) — update wizard data fields (e.g. {"bundesland": "Bayern"})
|
states: {{"state_name": {{"actions": [...], "display": [...]}}}}
|
||||||
- transition_machine(id, target) — move machine to a specific state
|
- add_state(id, state, buttons, content) — add state to machine
|
||||||
- emit_artifact(type, data, actions?, meta?) — emit a typed workspace artifact:
|
- reset_machine(id) — reset to initial
|
||||||
type="entity_detail": data={title, subtitle?, fields:[{label,value}]}, actions=[{label,action}]
|
- destroy_machine(id) — remove machine
|
||||||
type="data_table": data={title?, columns:[str], rows:[{col:val}]}
|
|
||||||
type="document_page": data={title, sections:[{heading,content}]}
|
|
||||||
type="action_bar": actions=[{label, action, payload?}]
|
|
||||||
type="status": data={label, value?, display_type:"progress"|"info"|"text"}
|
|
||||||
|
|
||||||
PREFERRED: Use emit_artifact for all display output. Legacy emit_card/emit_display still work but emit_artifact is cleaner.
|
|
||||||
Cards are also generated automatically in the response step from query results.
|
|
||||||
|
|
||||||
Output ONLY valid JSON:
|
Output ONLY valid JSON:
|
||||||
{
|
{{
|
||||||
"tool_sequence": [
|
"tool_sequence": [
|
||||||
{"tool": "query_db", "args": {"query": "SELECT ...", "database": "{database}"}}
|
{{"tool": "query_db", "args": {{"query": "SELECT ...", "database": "{database}"}}}},
|
||||||
|
{{"tool": "emit_actions", "args": {{"actions": [{{"label": "...", "action": "..."}}]}}}}
|
||||||
],
|
],
|
||||||
"response_hint": "How to phrase the result"
|
"response_hint": "How to phrase the result for the user"
|
||||||
}
|
}}
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- NEVER guess column names. Use ONLY columns from the schema.
|
- NEVER guess column names. If unsure, DESCRIBE first.
|
||||||
- Max 5 tools. Keep it focused.
|
- Max 5 tools. Keep it focused.
|
||||||
- For entity details: query all relevant fields, the response step creates the card.
|
- The job is self-contained — all context you need is in the job description."""
|
||||||
- For lists: query multiple rows, the table renders automatically.
|
|
||||||
- The job is self-contained.
|
|
||||||
- NEVER answer data questions without querying the database. You MUST include at least one query_db call for any job that asks about data, counts, costs, or entities. If you are unsure which tables to use, start with DESCRIBE or SELECT * FROM table LIMIT 3 to explore.
|
|
||||||
- An EMPTY tool_sequence is ONLY acceptable if the job explicitly asks for a UI-only action (buttons, machine, display) with no data lookup."""
|
|
||||||
|
|
||||||
RESPONSE_SYSTEM = """You are a domain expert summarizing results for the user.
|
RESPONSE_SYSTEM = """You are a domain expert summarizing results for the user.
|
||||||
|
|
||||||
@ -78,73 +68,37 @@ Rules:
|
|||||||
Job: {job}
|
Job: {job}
|
||||||
{results}
|
{results}
|
||||||
|
|
||||||
Output a JSON object with "text" (response to user) and optionally "card" (structured display):
|
Write a concise, natural response. 1-3 sentences.
|
||||||
|
- Reference specific data from the results.
|
||||||
{
|
- Don't repeat raw output — summarize.
|
||||||
"text": "Concise natural response, 1-3 sentences. Reference data. Match language: {language}.",
|
- Match the language: {language}."""
|
||||||
"card": {
|
|
||||||
"title": "Entity Name or ID",
|
|
||||||
"subtitle": "Type or category",
|
|
||||||
"fields": [{"label": "Field", "value": "actual value from results"}],
|
|
||||||
"actions": [{"label": "Next action", "action": "action_id"}]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Rules:
|
|
||||||
- "text" is REQUIRED. Keep it short.
|
|
||||||
- "card" is OPTIONAL. Include it for single-entity details (Kunde, Objekt, Auftrag).
|
|
||||||
- Card fields must use ACTUAL values from the query results, never templates/placeholders.
|
|
||||||
- For lists of multiple entities, use multiple fields or skip the card.
|
|
||||||
- If no card makes sense, just return {"text": "..."}.
|
|
||||||
- Output ONLY valid JSON."""
|
|
||||||
|
|
||||||
def __init__(self, send_hud, process_manager=None):
|
def __init__(self, send_hud, process_manager=None):
|
||||||
super().__init__(send_hud)
|
super().__init__(send_hud)
|
||||||
|
|
||||||
MAX_RETRIES = 3
|
|
||||||
|
|
||||||
async def execute(self, job: str, language: str = "de") -> ThoughtResult:
|
async def execute(self, job: str, language: str = "de") -> ThoughtResult:
|
||||||
"""Execute a self-contained job with retry on SQL errors.
|
"""Execute a self-contained job. Returns ThoughtResult."""
|
||||||
Expert knows the schema — plan, execute, retry if needed, respond."""
|
|
||||||
await self.hud("thinking", detail=f"planning: {job[:80]}")
|
await self.hud("thinking", detail=f"planning: {job[:80]}")
|
||||||
|
|
||||||
errors_so_far = []
|
# Step 1: Plan tool sequence
|
||||||
tool_sequence = []
|
|
||||||
response_hint = ""
|
|
||||||
|
|
||||||
for attempt in range(1, self.MAX_RETRIES + 1):
|
|
||||||
# Plan (or re-plan with error context)
|
|
||||||
plan_prompt = f"Job: {job}"
|
|
||||||
if errors_so_far:
|
|
||||||
plan_prompt += "\n\nPREVIOUS ATTEMPTS FAILED:\n"
|
|
||||||
for err in errors_so_far:
|
|
||||||
plan_prompt += f"- Query: {err['query']}\n Error: {err['error']}\n"
|
|
||||||
if 'describe' in err:
|
|
||||||
plan_prompt += f" DESCRIBE result: {err['describe'][:300]}\n"
|
|
||||||
plan_prompt += "\nFix the query. If a column was unknown, use the DESCRIBE result above or try SELECT * LIMIT 3 to see actual columns."
|
|
||||||
|
|
||||||
plan_system = self.PLAN_SYSTEM
|
|
||||||
plan_system = plan_system.replace("{domain}", self.DOMAIN_SYSTEM)
|
|
||||||
plan_system = plan_system.replace("{schema}", self.SCHEMA)
|
|
||||||
plan_system = plan_system.replace("{database}", self.default_database)
|
|
||||||
plan_messages = [
|
plan_messages = [
|
||||||
{"role": "system", "content": plan_system},
|
{"role": "system", "content": self.PLAN_SYSTEM.format(
|
||||||
{"role": "user", "content": plan_prompt},
|
domain=self.DOMAIN_SYSTEM, schema=self.SCHEMA,
|
||||||
|
database=self.default_database)},
|
||||||
|
{"role": "user", "content": f"Job: {job}"},
|
||||||
]
|
]
|
||||||
plan_raw = await llm_call(self.model, plan_messages)
|
plan_raw = await llm_call(self.model, plan_messages)
|
||||||
tool_sequence, response_hint = self._parse_plan(plan_raw)
|
tool_sequence, response_hint = self._parse_plan(plan_raw)
|
||||||
await self.hud("planned", tools=len(tool_sequence),
|
|
||||||
hint=response_hint[:80], attempt=attempt)
|
|
||||||
|
|
||||||
# Execute tools
|
await self.hud("planned", tools=len(tool_sequence), hint=response_hint[:80])
|
||||||
|
|
||||||
|
# Step 2: Execute tools
|
||||||
actions = []
|
actions = []
|
||||||
state_updates = {}
|
state_updates = {}
|
||||||
display_items = []
|
display_items = []
|
||||||
machine_ops = []
|
machine_ops = []
|
||||||
artifacts = []
|
|
||||||
tool_used = ""
|
tool_used = ""
|
||||||
tool_output = ""
|
tool_output = ""
|
||||||
had_error = False
|
|
||||||
|
|
||||||
for step in tool_sequence:
|
for step in tool_sequence:
|
||||||
tool = step.get("tool", "")
|
tool = step.get("tool", "")
|
||||||
@ -153,14 +107,6 @@ Rules:
|
|||||||
|
|
||||||
if tool == "emit_actions":
|
if tool == "emit_actions":
|
||||||
actions.extend(args.get("actions", []))
|
actions.extend(args.get("actions", []))
|
||||||
elif tool == "emit_card":
|
|
||||||
card = args.get("card", args)
|
|
||||||
card["type"] = "card"
|
|
||||||
display_items.append(card)
|
|
||||||
elif tool == "emit_list":
|
|
||||||
lst = args.get("list", args)
|
|
||||||
lst["type"] = "list"
|
|
||||||
display_items.append(lst)
|
|
||||||
elif tool == "set_state":
|
elif tool == "set_state":
|
||||||
key = args.get("key", "")
|
key = args.get("key", "")
|
||||||
if key:
|
if key:
|
||||||
@ -175,106 +121,32 @@ Rules:
|
|||||||
machine_ops.append({"op": "reset", **args})
|
machine_ops.append({"op": "reset", **args})
|
||||||
elif tool == "destroy_machine":
|
elif tool == "destroy_machine":
|
||||||
machine_ops.append({"op": "destroy", **args})
|
machine_ops.append({"op": "destroy", **args})
|
||||||
elif tool == "update_machine":
|
|
||||||
machine_ops.append({"op": "update_data", **args})
|
|
||||||
elif tool == "transition_machine":
|
|
||||||
machine_ops.append({"op": "transition", **args})
|
|
||||||
elif tool == "emit_artifact":
|
|
||||||
import uuid
|
|
||||||
artifact = {
|
|
||||||
"id": args.get("id", str(uuid.uuid4())[:8]),
|
|
||||||
"type": args.get("type", "status"),
|
|
||||||
"data": args.get("data", {}),
|
|
||||||
"actions": args.get("actions", []),
|
|
||||||
"meta": args.get("meta", {}),
|
|
||||||
}
|
|
||||||
artifacts.append(artifact)
|
|
||||||
elif tool == "query_db":
|
elif tool == "query_db":
|
||||||
query = args.get("query", "")
|
query = args.get("query", "")
|
||||||
database = args.get("database", self.default_database)
|
database = args.get("database", self.default_database)
|
||||||
try:
|
try:
|
||||||
result = await asyncio.to_thread(run_db_query, query, database)
|
result = await asyncio.to_thread(run_db_query, query, database)
|
||||||
if result.startswith("Error:"):
|
|
||||||
err_entry = {"query": query, "error": result}
|
|
||||||
# Auto-DESCRIBE on column errors to help retry
|
|
||||||
if "Unknown column" in result or "1054" in result:
|
|
||||||
import re
|
|
||||||
# Extract table name from query
|
|
||||||
tables_in_query = re.findall(r'FROM\s+(\w+)|JOIN\s+(\w+)', query, re.IGNORECASE)
|
|
||||||
for match in tables_in_query:
|
|
||||||
tname = match[0] or match[1]
|
|
||||||
if tname:
|
|
||||||
try:
|
|
||||||
desc = await asyncio.to_thread(run_db_query, f"DESCRIBE {tname}", database)
|
|
||||||
err_entry["describe"] = f"{tname}: {desc[:300]}"
|
|
||||||
await self.hud("tool_result", tool="describe",
|
|
||||||
output=f"Auto-DESCRIBE {tname}")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
break
|
|
||||||
errors_so_far.append(err_entry)
|
|
||||||
had_error = True
|
|
||||||
await self.hud("tool_result", tool="query_db",
|
|
||||||
output=f"ERROR (attempt {attempt}): {result[:150]}")
|
|
||||||
break
|
|
||||||
tool_used = "query_db"
|
tool_used = "query_db"
|
||||||
tool_output = result
|
tool_output = result
|
||||||
await self.hud("tool_result", tool="query_db", output=result[:200])
|
await self.hud("tool_result", tool="query_db", output=result[:200])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
errors_so_far.append({"query": query, "error": str(e)})
|
tool_used = "query_db"
|
||||||
had_error = True
|
tool_output = f"Error: {e}"
|
||||||
await self.hud("tool_result", tool="query_db",
|
await self.hud("tool_result", tool="query_db", output=str(e)[:200])
|
||||||
output=f"ERROR (attempt {attempt}): {e}")
|
|
||||||
break
|
|
||||||
|
|
||||||
if not had_error:
|
# Step 3: Generate response
|
||||||
break # success — stop retrying
|
|
||||||
log.info(f"[expert] attempt {attempt} failed, {len(errors_so_far)} errors")
|
|
||||||
|
|
||||||
# Generate response (with whatever we have — success or final error)
|
|
||||||
results_text = ""
|
results_text = ""
|
||||||
if tool_output:
|
if tool_output:
|
||||||
results_text = f"Tool result:\n{tool_output[:500]}"
|
results_text = f"Tool result:\n{tool_output[:500]}"
|
||||||
elif errors_so_far:
|
|
||||||
results_text = f"All {len(errors_so_far)} query attempts failed:\n"
|
|
||||||
for err in errors_so_far[-2:]:
|
|
||||||
results_text += f" {err['error'][:100]}\n"
|
|
||||||
|
|
||||||
resp_system = self.RESPONSE_SYSTEM
|
|
||||||
resp_system = resp_system.replace("{domain}", self.DOMAIN_SYSTEM)
|
|
||||||
resp_system = resp_system.replace("{job}", job)
|
|
||||||
resp_system = resp_system.replace("{results}", results_text)
|
|
||||||
resp_system = resp_system.replace("{language}", language)
|
|
||||||
resp_messages = [
|
resp_messages = [
|
||||||
{"role": "system", "content": resp_system},
|
{"role": "system", "content": self.RESPONSE_SYSTEM.format(
|
||||||
|
domain=self.DOMAIN_SYSTEM, job=job, results=results_text, language=language)},
|
||||||
{"role": "user", "content": job},
|
{"role": "user", "content": job},
|
||||||
]
|
]
|
||||||
raw_response = await llm_call(self.model, resp_messages)
|
response = await llm_call(self.model, resp_messages)
|
||||||
|
if not response:
|
||||||
# Parse JSON response with optional card
|
response = "[no response]"
|
||||||
response = raw_response or "[no response]"
|
|
||||||
try:
|
|
||||||
text = raw_response.strip()
|
|
||||||
if text.startswith("```"):
|
|
||||||
text = text.split("\n", 1)[1] if "\n" in text else text[3:]
|
|
||||||
if text.endswith("```"):
|
|
||||||
text = text[:-3]
|
|
||||||
text = text.strip()
|
|
||||||
resp_data = json.loads(text)
|
|
||||||
response = resp_data.get("text", raw_response)
|
|
||||||
if resp_data.get("artifact"):
|
|
||||||
# New: artifact in response JSON
|
|
||||||
art = resp_data["artifact"]
|
|
||||||
import uuid
|
|
||||||
if "id" not in art:
|
|
||||||
art["id"] = str(uuid.uuid4())[:8]
|
|
||||||
artifacts.append(art)
|
|
||||||
elif resp_data.get("card"):
|
|
||||||
card = resp_data["card"]
|
|
||||||
card["type"] = "card"
|
|
||||||
display_items.append(card)
|
|
||||||
except (json.JSONDecodeError, Exception):
|
|
||||||
pass # Use raw response as text
|
|
||||||
|
|
||||||
await self.hud("done", response=response[:100])
|
await self.hud("done", response=response[:100])
|
||||||
|
|
||||||
@ -286,8 +158,6 @@ Rules:
|
|||||||
state_updates=state_updates,
|
state_updates=state_updates,
|
||||||
display_items=display_items,
|
display_items=display_items,
|
||||||
machine_ops=machine_ops,
|
machine_ops=machine_ops,
|
||||||
errors=errors_so_far,
|
|
||||||
artifacts=artifacts,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _parse_plan(self, raw: str) -> tuple[list, str]:
|
def _parse_plan(self, raw: str) -> tuple[list, str]:
|
||||||
|
|||||||
@ -22,7 +22,7 @@ Listener: {identity} on {channel}
|
|||||||
Return ONLY valid JSON. No markdown, no explanation.
|
Return ONLY valid JSON. No markdown, no explanation.
|
||||||
|
|
||||||
Schema:
|
Schema:
|
||||||
{
|
{{
|
||||||
"who": "name or unknown",
|
"who": "name or unknown",
|
||||||
"language": "en | de | mixed",
|
"language": "en | de | mixed",
|
||||||
"intent": "question | request | social | action | feedback",
|
"intent": "question | request | social | action | feedback",
|
||||||
@ -30,7 +30,7 @@ Schema:
|
|||||||
"tone": "casual | frustrated | playful | urgent",
|
"tone": "casual | frustrated | playful | urgent",
|
||||||
"complexity": "trivial | simple | complex",
|
"complexity": "trivial | simple | complex",
|
||||||
"context": "brief note or empty"
|
"context": "brief note or empty"
|
||||||
}
|
}}
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- Classify the CURRENT message only. Previous messages are context, not the target.
|
- Classify the CURRENT message only. Previous messages are context, not the target.
|
||||||
@ -53,11 +53,11 @@ Rules:
|
|||||||
casual = neutral
|
casual = neutral
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
"hi there!" -> {"language":"en","intent":"social","tone":"casual","complexity":"trivial"}
|
"hi there!" -> {{"language":"en","intent":"social","tone":"casual","complexity":"trivial"}}
|
||||||
"Wie spaet ist es?" -> {"language":"de","intent":"question","tone":"casual","complexity":"simple"}
|
"Wie spaet ist es?" -> {{"language":"de","intent":"question","tone":"casual","complexity":"simple"}}
|
||||||
"this is broken, nothing works" -> {"language":"en","intent":"feedback","tone":"frustrated","complexity":"simple"}
|
"this is broken, nothing works" -> {{"language":"en","intent":"feedback","tone":"frustrated","complexity":"simple"}}
|
||||||
"create two buttons" -> {"language":"en","intent":"request","tone":"casual","complexity":"simple"}
|
"create two buttons" -> {{"language":"en","intent":"request","tone":"casual","complexity":"simple"}}
|
||||||
"ok thanks bye" -> {"language":"en","intent":"social","tone":"casual","complexity":"trivial"}
|
"ok thanks bye" -> {{"language":"en","intent":"social","tone":"casual","complexity":"trivial"}}
|
||||||
|
|
||||||
{memory_context}"""
|
{memory_context}"""
|
||||||
|
|
||||||
@ -78,9 +78,8 @@ Examples:
|
|||||||
history_summary = "Recent conversation:\n" + "\n".join(lines)
|
history_summary = "Recent conversation:\n" + "\n".join(lines)
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": self.SYSTEM.replace(
|
{"role": "system", "content": self.SYSTEM.format(
|
||||||
"{memory_context}", memory_context).replace(
|
memory_context=memory_context, identity=identity, channel=channel)},
|
||||||
"{identity}", identity).replace("{channel}", channel)},
|
|
||||||
]
|
]
|
||||||
if history_summary:
|
if history_summary:
|
||||||
messages.append({"role": "user", "content": history_summary})
|
messages.append({"role": "user", "content": history_summary})
|
||||||
|
|||||||
@ -26,19 +26,6 @@ Given the conversation so far, output a JSON object with these fields:
|
|||||||
- language: string — primary language being used (en, de, mixed)
|
- language: string — primary language being used (en, de, mixed)
|
||||||
- style_hint: string — how Output should talk (casual, formal, technical, poetic, etc.)
|
- style_hint: string — how Output should talk (casual, formal, technical, poetic, etc.)
|
||||||
- facts: list of strings — important facts learned about the user. NEVER drop facts from the existing list unless they are proven wrong. Always include all existing facts plus any new ones.
|
- facts: list of strings — important facts learned about the user. NEVER drop facts from the existing list unless they are proven wrong. Always include all existing facts plus any new ones.
|
||||||
- user_expectation: string — what the user expects the agent to do next. One of:
|
|
||||||
"conversational" — default. User is chatting, asking questions, browsing. Normal back-and-forth.
|
|
||||||
"delegated" — user gave an imperative task ("build X", "do Y", "create Z"). They expect autonomous progress, not clarifying questions.
|
|
||||||
"waiting_input" — agent asked a question or presented choices. User's next message is likely an answer.
|
|
||||||
"observing" — user returned after being idle, or is reviewing a large output. Brief responses, wait for explicit engagement.
|
|
||||||
Cues:
|
|
||||||
- Imperative verbs + task scope ("build", "create", "do", "find") → delegated
|
|
||||||
- Agent ended with "Moment..." / thinking message but user hasn't seen full results yet → delegated (task still in progress)
|
|
||||||
- Short follow-ups like "und?", "ja?", "weiter?", "and?", "so?", "result?", "ergebnis?" → waiting_input (user is waiting for the agent to deliver)
|
|
||||||
- Agent ended with a question ("Sollen wir...?", "Gibt es...?") → waiting_input
|
|
||||||
- User said "ok/thanks/bye/danke" after output → observing
|
|
||||||
- Everything else → conversational
|
|
||||||
IMPORTANT: If the agent just delivered partial results or said "Moment..." and the user sends a short nudge, that is ALWAYS waiting_input, never conversational.
|
|
||||||
|
|
||||||
Output ONLY valid JSON. No explanation, no markdown fences."""
|
Output ONLY valid JSON. No explanation, no markdown fences."""
|
||||||
|
|
||||||
@ -53,7 +40,6 @@ Output ONLY valid JSON. No explanation, no markdown fences."""
|
|||||||
"language": "en",
|
"language": "en",
|
||||||
"style_hint": "casual, technical",
|
"style_hint": "casual, technical",
|
||||||
"facts": [],
|
"facts": [],
|
||||||
"user_expectation": "conversational",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_context_block(self, sensor_lines: list[str] = None, ui_state: dict = None) -> str:
|
def get_context_block(self, sensor_lines: list[str] = None, ui_state: dict = None) -> str:
|
||||||
|
|||||||
@ -34,12 +34,6 @@ YOUR JOB: Transform the Thinker's reasoning into a natural, human-readable text
|
|||||||
- Keep the user's language — if they wrote German, respond in German.
|
- Keep the user's language — if they wrote German, respond in German.
|
||||||
- Be concise. Don't describe data that the UI node will show as a table.
|
- Be concise. Don't describe data that the UI node will show as a table.
|
||||||
|
|
||||||
PHRASING by user_expectation (from memorizer):
|
|
||||||
- "delegated": progress-report style. State what was done and what's next. No questions unless blocked.
|
|
||||||
- "waiting_input": acknowledge the user's answer and continue the flow naturally.
|
|
||||||
- "observing": keep it brief. No unsolicited follow-up questions or suggestions.
|
|
||||||
- "conversational": natural, warm dialogue. Follow-ups are fine.
|
|
||||||
|
|
||||||
{memory_context}"""
|
{memory_context}"""
|
||||||
|
|
||||||
async def process(self, thought: ThoughtResult, history: list[dict],
|
async def process(self, thought: ThoughtResult, history: list[dict],
|
||||||
@ -48,7 +42,7 @@ PHRASING by user_expectation (from memorizer):
|
|||||||
await self.hud("streaming")
|
await self.hud("streaming")
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": self.SYSTEM.replace("{memory_context}", memory_context)},
|
{"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
|
||||||
]
|
]
|
||||||
for msg in history[-20:]:
|
for msg in history[-20:]:
|
||||||
messages.append(msg)
|
messages.append(msg)
|
||||||
|
|||||||
@ -27,8 +27,6 @@ Experts have these tools:
|
|||||||
- query_db — SQL queries on their domain database
|
- query_db — SQL queries on their domain database
|
||||||
- emit_actions — create buttons on the dashboard
|
- emit_actions — create buttons on the dashboard
|
||||||
- create_machine / add_state / reset_machine / destroy_machine — interactive UI components
|
- create_machine / add_state / reset_machine / destroy_machine — interactive UI components
|
||||||
- update_machine(id, data) — update wizard data fields on existing machine
|
|
||||||
- transition_machine(id, target) — move machine to a specific state
|
|
||||||
- set_state — persistent key-value store
|
- set_state — persistent key-value store
|
||||||
- emit_display — formatted data display
|
- emit_display — formatted data display
|
||||||
|
|
||||||
@ -38,13 +36,13 @@ YOUR JOB:
|
|||||||
3. Only respond directly for social chat (greetings, thanks, bye, small talk)
|
3. Only respond directly for social chat (greetings, thanks, bye, small talk)
|
||||||
|
|
||||||
Output ONLY valid JSON:
|
Output ONLY valid JSON:
|
||||||
{
|
{{
|
||||||
"expert": "{expert_names} | none",
|
"expert": "{expert_names} | none",
|
||||||
"job": "Self-contained task. Include ALL context — the expert has NO conversation history. Describe what to query, what UI to build, what the user expects to see.",
|
"job": "Self-contained task. Include ALL context — the expert has NO conversation history. Describe what to query, what UI to build, what the user expects to see.",
|
||||||
"thinking_message": "Short message for user while expert works, in their language",
|
"thinking_message": "Short message for user while expert works, in their language",
|
||||||
"response_hint": "If expert=none, your direct response to the user.",
|
"response_hint": "If expert=none, your direct response to the user.",
|
||||||
"language": "de | en | mixed"
|
"language": "de | en | mixed"
|
||||||
}
|
}}
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- expert=none ONLY for social chat (hi, thanks, bye, how are you)
|
- expert=none ONLY for social chat (hi, thanks, bye, how are you)
|
||||||
@ -55,21 +53,11 @@ Rules:
|
|||||||
- thinking_message: natural, in user's language. e.g. "Moment, ich schaue nach..."
|
- thinking_message: natural, in user's language. e.g. "Moment, ich schaue nach..."
|
||||||
- If the user mentions data, tables, customers, devices, buttons, counters → expert
|
- If the user mentions data, tables, customers, devices, buttons, counters → expert
|
||||||
- When unsure which expert: pick the one whose domain matches best
|
- When unsure which expert: pick the one whose domain matches best
|
||||||
- MACHINE STATE: If there are active machines/wizards listed in the context below, ALWAYS include the machine's current state and stored data in the job. The expert needs this to continue the workflow. Example: "Machine 'angebot_wizard' is on step 'select_age', data: {bundesland: Bayern}. User asks: ..."
|
|
||||||
- If the user asks about their wizard/workflow progress and the info is already visible in the context, respond directly (expert=none) using the machine state from context. Only route to expert if the user needs data queried or tools called.
|
|
||||||
- For update_machine / transition_machine requests: route to expert with the machine ID and operation details in the job.
|
|
||||||
|
|
||||||
USER EXPECTATION (from memorizer):
|
|
||||||
- If user_expectation is "delegated": formulate comprehensive, autonomous jobs. Do NOT include clarifying questions in the job. Tell the expert to proceed and report results.
|
|
||||||
- If user_expectation is "waiting_input": the user is waiting for results or nudging ("und?", "ja?", "weiter?"). Look at conversation history to find what they were waiting for and re-formulate that job. If they answered a question you asked, extract their answer and fold it into context.
|
|
||||||
- If user_expectation is "observing": only route to expert if the user explicitly asks for something. Otherwise respond directly with brief acknowledgment.
|
|
||||||
- If user_expectation is "conversational": normal routing behavior.
|
|
||||||
- CONTINUATION: When user sends a very short message (1-3 words like "und?", "weiter", "ja") after partial/incomplete results, treat it as "continue the previous task". Include the original question and any partial results in the job.
|
|
||||||
|
|
||||||
{memory_context}"""
|
{memory_context}"""
|
||||||
|
|
||||||
EXPERT_DESCRIPTIONS = {
|
EXPERT_DESCRIPTIONS = {
|
||||||
"eras": "eras — Heizkostenabrechnung (German heating cost billing). Users are Hausverwaltungen managing Kunden, Objekte (buildings), Nutzeinheiten (apartments), Geraete (meters), Verbraeuche (readings), Abrechnungen (billings), Auftraege (work orders). Hierarchy: Kunde > Objekte > Nutzeinheiten > Geraete > Verbraeuche. Database: eras2_production. Can also build dashboard UI.",
|
"eras": "eras — heating/energy domain. Database: eras2_production (customers, devices, billing, consumption). Can also build dashboard UI (buttons, machines, counters, tables) for energy data workflows.",
|
||||||
"plankiste": "plankiste — Kita planning domain. Database: plankiste_test (children, care schedules, offers, pricing). Can build dashboard UI for education workflows and generate Angebote.",
|
"plankiste": "plankiste — Kita planning domain. Database: plankiste_test (children, care schedules, offers, pricing). Can build dashboard UI for education workflows and generate Angebote.",
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,15 +89,10 @@ USER EXPECTATION (from memorizer):
|
|||||||
expert_lines.append("- (no experts available — handle everything directly)")
|
expert_lines.append("- (no experts available — handle everything directly)")
|
||||||
|
|
||||||
expert_names = " | ".join(self._available_experts) if self._available_experts else "none"
|
expert_names = " | ".join(self._available_experts) if self._available_experts else "none"
|
||||||
# Manual substitution to avoid .format() breaking on curly braces in memory_context
|
|
||||||
system_content = self.SYSTEM
|
|
||||||
system_content = system_content.replace("{memory_context}", memory_context)
|
|
||||||
system_content = system_content.replace("{identity}", identity)
|
|
||||||
system_content = system_content.replace("{channel}", channel)
|
|
||||||
system_content = system_content.replace("{experts}", "\n".join(expert_lines))
|
|
||||||
system_content = system_content.replace("{expert_names}", expert_names)
|
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": system_content},
|
{"role": "system", "content": self.SYSTEM.format(
|
||||||
|
memory_context=memory_context, identity=identity, channel=channel,
|
||||||
|
experts="\n".join(expert_lines), expert_names=expert_names)},
|
||||||
]
|
]
|
||||||
|
|
||||||
# Summarize recent history (PA sees full context)
|
# Summarize recent history (PA sees full context)
|
||||||
@ -135,7 +118,7 @@ USER EXPECTATION (from memorizer):
|
|||||||
log.info(f"[pa] raw: {raw[:300]}")
|
log.info(f"[pa] raw: {raw[:300]}")
|
||||||
|
|
||||||
routing = self._parse_routing(raw, command)
|
routing = self._parse_routing(raw, command)
|
||||||
await self.hud("routed", expert=routing.expert, job=(routing.job or "")[:100],
|
await self.hud("routed", expert=routing.expert, job=routing.job[:100],
|
||||||
direct=routing.expert == "none")
|
direct=routing.expert == "none")
|
||||||
|
|
||||||
# Update directive style based on tone
|
# Update directive style based on tone
|
||||||
@ -148,72 +131,6 @@ USER EXPECTATION (from memorizer):
|
|||||||
|
|
||||||
return routing
|
return routing
|
||||||
|
|
||||||
async def route_retry(self, command: Command, history: list[dict],
|
|
||||||
memory_context: str = "", identity: str = "unknown",
|
|
||||||
channel: str = "unknown", original_job: str = "",
|
|
||||||
errors: list = None) -> PARouting:
|
|
||||||
"""Re-route after expert failure. PA reformulates with error context."""
|
|
||||||
await self.hud("thinking", detail="reformulating after expert failure")
|
|
||||||
|
|
||||||
error_lines = []
|
|
||||||
for err in (errors or [])[-3:]:
|
|
||||||
error_lines.append(f"- Query: {err.get('query', '?')[:100]}")
|
|
||||||
error_lines.append(f" Error: {err.get('error', '?')[:100]}")
|
|
||||||
if err.get("describe"):
|
|
||||||
error_lines.append(f" Schema: {err['describe'][:200]}")
|
|
||||||
|
|
||||||
retry_prompt = f"""The expert FAILED the previous job. You must reformulate.
|
|
||||||
|
|
||||||
ORIGINAL JOB: {original_job}
|
|
||||||
|
|
||||||
ERRORS:
|
|
||||||
{chr(10).join(error_lines)}
|
|
||||||
|
|
||||||
REFORMULATE the job with a DIFFERENT approach:
|
|
||||||
- If the query was too complex (JOINs, window functions), break it into simpler steps
|
|
||||||
- If columns were wrong, use the DESCRIBE info above to fix them
|
|
||||||
- If the table structure is unclear, tell the expert to first explore with SELECT * LIMIT 5
|
|
||||||
- Think about what data the user actually needs and find a simpler path to it
|
|
||||||
|
|
||||||
Output the same JSON format as before. The job MUST be different from the original."""
|
|
||||||
|
|
||||||
expert_lines = []
|
|
||||||
for name in self._available_experts:
|
|
||||||
desc = self.EXPERT_DESCRIPTIONS.get(name, f"{name} — domain expert")
|
|
||||||
expert_lines.append(f"- {desc}")
|
|
||||||
expert_names = " | ".join(self._available_experts) if self._available_experts else "none"
|
|
||||||
|
|
||||||
system_content = self.SYSTEM
|
|
||||||
system_content = system_content.replace("{memory_context}", memory_context)
|
|
||||||
system_content = system_content.replace("{identity}", identity)
|
|
||||||
system_content = system_content.replace("{channel}", channel)
|
|
||||||
system_content = system_content.replace("{experts}", "\n".join(expert_lines))
|
|
||||||
system_content = system_content.replace("{expert_names}", expert_names)
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{"role": "system", "content": system_content},
|
|
||||||
]
|
|
||||||
recent = history[-8:]
|
|
||||||
if recent:
|
|
||||||
lines = []
|
|
||||||
for msg in recent:
|
|
||||||
role = msg.get("role", "?")
|
|
||||||
content = msg.get("content", "")[:200]
|
|
||||||
lines.append(f" {role}: {content}")
|
|
||||||
messages.append({"role": "user", "content": "Recent conversation:\n" + "\n".join(lines)})
|
|
||||||
messages.append({"role": "assistant", "content": "OK, I have the context."})
|
|
||||||
|
|
||||||
messages.append({"role": "user", "content": retry_prompt})
|
|
||||||
messages = self.trim_context(messages)
|
|
||||||
|
|
||||||
raw = await llm_call(self.model, messages)
|
|
||||||
log.info(f"[pa] retry raw: {raw[:300]}")
|
|
||||||
|
|
||||||
routing = self._parse_routing(raw, command)
|
|
||||||
await self.hud("routed", expert=routing.expert, job=(routing.job or "")[:100],
|
|
||||||
direct=routing.expert == "none", retry=True)
|
|
||||||
return routing
|
|
||||||
|
|
||||||
def _parse_routing(self, raw: str, command: Command) -> PARouting:
|
def _parse_routing(self, raw: str, command: Command) -> PARouting:
|
||||||
"""Parse LLM JSON into PARouting with fallback."""
|
"""Parse LLM JSON into PARouting with fallback."""
|
||||||
text = raw.strip()
|
text = raw.strip()
|
||||||
@ -232,10 +149,10 @@ Output the same JSON format as before. The job MUST be different from the origin
|
|||||||
expert = "none"
|
expert = "none"
|
||||||
return PARouting(
|
return PARouting(
|
||||||
expert=expert,
|
expert=expert,
|
||||||
job=data.get("job") or "",
|
job=data.get("job", ""),
|
||||||
thinking_message=data.get("thinking_message") or "",
|
thinking_message=data.get("thinking_message", ""),
|
||||||
response_hint=data.get("response_hint") or "",
|
response_hint=data.get("response_hint", ""),
|
||||||
language=data.get("language") or command.analysis.language,
|
language=data.get("language", command.analysis.language),
|
||||||
)
|
)
|
||||||
except (json.JSONDecodeError, Exception) as e:
|
except (json.JSONDecodeError, Exception) as e:
|
||||||
log.error(f"[pa] parse failed: {e}, raw: {text[:200]}")
|
log.error(f"[pa] parse failed: {e}, raw: {text[:200]}")
|
||||||
|
|||||||
@ -236,7 +236,7 @@ You are one node in a pipeline: Input (perceives) -> You (reason) -> Output (spe
|
|||||||
|
|
||||||
1. emit_actions() — show buttons. Button clicks come back as "ACTION: action_name".
|
1. emit_actions() — show buttons. Button clicks come back as "ACTION: action_name".
|
||||||
Stateful buttons: include var/op in payload (inc/dec/set/toggle). UI handles locally.
|
Stateful buttons: include var/op in payload (inc/dec/set/toggle). UI handles locally.
|
||||||
Example: label:"+1", action:"increment", payload:{"var":"count","op":"inc","initial":0}
|
Example: label:"+1", action:"increment", payload:{{"var":"count","op":"inc","initial":0}}
|
||||||
|
|
||||||
2. set_state(key, value) — persistent key-value store shown as live labels.
|
2. set_state(key, value) — persistent key-value store shown as live labels.
|
||||||
Survives across turns. Use for tracking mode, progress, flags.
|
Survives across turns. Use for tracking mode, progress, flags.
|
||||||
@ -253,9 +253,9 @@ You are one node in a pipeline: Input (perceives) -> You (reason) -> Output (spe
|
|||||||
destroy_machine(id) — remove machine from dashboard.
|
destroy_machine(id) — remove machine from dashboard.
|
||||||
Example — navigation menu:
|
Example — navigation menu:
|
||||||
create_machine(id="nav", initial="main", states=[
|
create_machine(id="nav", initial="main", states=[
|
||||||
{"name":"main","buttons":[{"label":"Menu 1","action":"menu_1","go":"sub1"},{"label":"Menu 2","action":"menu_2","go":"sub2"}],"content":["Welcome"]},
|
{{"name":"main","buttons":[{{"label":"Menu 1","action":"menu_1","go":"sub1"}},{{"label":"Menu 2","action":"menu_2","go":"sub2"}}],"content":["Welcome"]}},
|
||||||
{"name":"sub1","buttons":[{"label":"Back","action":"back","go":"main"}],"content":["Sub 1 details"]},
|
{{"name":"sub1","buttons":[{{"label":"Back","action":"back","go":"main"}}],"content":["Sub 1 details"]}},
|
||||||
{"name":"sub2","buttons":[{"label":"Back","action":"back","go":"main"}],"content":["Sub 2 details"]}
|
{{"name":"sub2","buttons":[{{"label":"Back","action":"back","go":"main"}}],"content":["Sub 2 details"]}}
|
||||||
])
|
])
|
||||||
PREFER machines over emit_actions for anything with navigation or multiple views.
|
PREFER machines over emit_actions for anything with navigation or multiple views.
|
||||||
ALWAYS include states when creating a machine. Never write code — use the tool.
|
ALWAYS include states when creating a machine. Never write code — use the tool.
|
||||||
@ -350,10 +350,10 @@ conn.commit()
|
|||||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||||
tables = cursor.fetchall()
|
tables = cursor.fetchall()
|
||||||
for t in tables:
|
for t in tables:
|
||||||
cursor.execute(f"SELECT * FROM {t[0]}")
|
cursor.execute(f"SELECT * FROM {{t[0]}}")
|
||||||
rows = cursor.fetchall()
|
rows = cursor.fetchall()
|
||||||
cols = [d[0] for d in cursor.description]
|
cols = [d[0] for d in cursor.description]
|
||||||
print(f"Table: {t[0]}")
|
print(f"Table: {{t[0]}}")
|
||||||
print(" | ".join(cols))
|
print(" | ".join(cols))
|
||||||
for row in rows:
|
for row in rows:
|
||||||
print(" | ".join(str(c) for c in row))
|
print(" | ".join(str(c) for c in row))
|
||||||
@ -446,7 +446,7 @@ conn.close()'''
|
|||||||
await self.hud("thinking", detail="reasoning about response")
|
await self.hud("thinking", detail="reasoning about response")
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": self.SYSTEM.replace("{memory_context}", memory_context)},
|
{"role": "system", "content": self.SYSTEM.format(memory_context=memory_context)},
|
||||||
]
|
]
|
||||||
for msg in history[-12:]:
|
for msg in history[-12:]:
|
||||||
messages.append(msg)
|
messages.append(msg)
|
||||||
|
|||||||
@ -88,7 +88,7 @@ Rules:
|
|||||||
hint += f"\nTool result:\n{tool_output[:500]}"
|
hint += f"\nTool result:\n{tool_output[:500]}"
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": self.RESPONSE_SYSTEM.replace("{hint}", hint)},
|
{"role": "system", "content": self.RESPONSE_SYSTEM.format(hint=hint)},
|
||||||
]
|
]
|
||||||
for msg in history[-8:]:
|
for msg in history[-8:]:
|
||||||
messages.append(msg)
|
messages.append(msg)
|
||||||
|
|||||||
@ -2,10 +2,9 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
|
||||||
|
|
||||||
from .base import Node
|
from .base import Node
|
||||||
from ..types import ThoughtResult, Artifact
|
from ..types import ThoughtResult
|
||||||
|
|
||||||
log = logging.getLogger("runtime")
|
log = logging.getLogger("runtime")
|
||||||
|
|
||||||
@ -17,7 +16,6 @@ class UINode(Node):
|
|||||||
def __init__(self, send_hud):
|
def __init__(self, send_hud):
|
||||||
super().__init__(send_hud)
|
super().__init__(send_hud)
|
||||||
self.thinker_controls: list[dict] = [] # buttons, labels, tables from Thinker
|
self.thinker_controls: list[dict] = [] # buttons, labels, tables from Thinker
|
||||||
self.artifacts: list[dict] = [] # typed workspace artifacts
|
|
||||||
self.state: dict = {} # {"count": 0, "theme": "dark", ...}
|
self.state: dict = {} # {"count": 0, "theme": "dark", ...}
|
||||||
self.bindings: dict = {} # {"increment": {"op": "inc", "var": "count"}, ...}
|
self.bindings: dict = {} # {"increment": {"op": "inc", "var": "count"}, ...}
|
||||||
self.machines: dict = {} # {"nav": {initial, states, current}, ...}
|
self.machines: dict = {} # {"nav": {initial, states, current}, ...}
|
||||||
@ -81,7 +79,6 @@ class UINode(Node):
|
|||||||
"initial": initial,
|
"initial": initial,
|
||||||
"current": initial,
|
"current": initial,
|
||||||
"states": states,
|
"states": states,
|
||||||
"data": {}, # wizard field storage (e.g. {"bundesland": "Bayern"})
|
|
||||||
}
|
}
|
||||||
log.info(f"[ui] machine created: {mid} (initial={initial}, {len(states)} states)")
|
log.info(f"[ui] machine created: {mid} (initial={initial}, {len(states)} states)")
|
||||||
await self.hud("machine_created", id=mid, initial=initial, state_count=len(states))
|
await self.hud("machine_created", id=mid, initial=initial, state_count=len(states))
|
||||||
@ -107,28 +104,6 @@ class UINode(Node):
|
|||||||
log.info(f"[ui] machine reset: {mid} -> {initial}")
|
log.info(f"[ui] machine reset: {mid} -> {initial}")
|
||||||
await self.hud("machine_reset", id=mid, state=initial)
|
await self.hud("machine_reset", id=mid, state=initial)
|
||||||
|
|
||||||
elif op == "update_data":
|
|
||||||
if mid not in self.machines:
|
|
||||||
log.warning(f"[ui] update_data: machine '{mid}' not found")
|
|
||||||
continue
|
|
||||||
data_update = op_data.get("data", {})
|
|
||||||
self.machines[mid]["data"].update(data_update)
|
|
||||||
log.info(f"[ui] machine data updated: {mid} += {data_update}")
|
|
||||||
await self.hud("machine_data_updated", id=mid, data=data_update)
|
|
||||||
|
|
||||||
elif op == "transition":
|
|
||||||
if mid not in self.machines:
|
|
||||||
log.warning(f"[ui] transition: machine '{mid}' not found")
|
|
||||||
continue
|
|
||||||
target = op_data.get("target", "")
|
|
||||||
if target in self.machines[mid]["states"]:
|
|
||||||
old = self.machines[mid]["current"]
|
|
||||||
self.machines[mid]["current"] = target
|
|
||||||
log.info(f"[ui] machine transition (expert): {mid} {old} -> {target}")
|
|
||||||
await self.hud("machine_transitioned", id=mid, old=old, target=target)
|
|
||||||
else:
|
|
||||||
log.warning(f"[ui] transition target '{target}' not found in {mid}")
|
|
||||||
|
|
||||||
elif op == "destroy":
|
elif op == "destroy":
|
||||||
if mid in self.machines:
|
if mid in self.machines:
|
||||||
del self.machines[mid]
|
del self.machines[mid]
|
||||||
@ -182,31 +157,15 @@ class UINode(Node):
|
|||||||
return controls
|
return controls
|
||||||
|
|
||||||
def get_machine_summary(self) -> str:
|
def get_machine_summary(self) -> str:
|
||||||
"""Rich summary for PA/Thinker context — includes current state details and stored data."""
|
"""Summary for Thinker context — shape only, not full data."""
|
||||||
if not self.machines:
|
if not self.machines:
|
||||||
return ""
|
return ""
|
||||||
parts = []
|
parts = []
|
||||||
for mid, m in self.machines.items():
|
for mid, m in self.machines.items():
|
||||||
current = m["current"]
|
current = m["current"]
|
||||||
state_names = list(m["states"].keys())
|
state_names = list(m["states"].keys())
|
||||||
state_def = m["states"].get(current, {})
|
parts.append(f" machine '{mid}': state={current}, states={state_names}")
|
||||||
line = f" machine '{mid}': state={current}, states={state_names}"
|
return "Machines:\n" + "\n".join(parts)
|
||||||
# Current state content
|
|
||||||
content = state_def.get("content", [])
|
|
||||||
if content:
|
|
||||||
line += f", content={content}"
|
|
||||||
# Current state buttons
|
|
||||||
buttons = state_def.get("buttons", [])
|
|
||||||
if buttons:
|
|
||||||
btn_labels = [b.get("label", b.get("action", "?")) for b in buttons if isinstance(b, dict)]
|
|
||||||
if btn_labels:
|
|
||||||
line += f", buttons={btn_labels}"
|
|
||||||
# Stored wizard data
|
|
||||||
data = m.get("data", {})
|
|
||||||
if data:
|
|
||||||
line += f", data={data}"
|
|
||||||
parts.append(line)
|
|
||||||
return "Active machines (interactive wizard/workflow state):\n" + "\n".join(parts)
|
|
||||||
|
|
||||||
# --- State operations ---
|
# --- State operations ---
|
||||||
|
|
||||||
@ -347,17 +306,12 @@ class UINode(Node):
|
|||||||
"value": str(value),
|
"value": str(value),
|
||||||
})
|
})
|
||||||
|
|
||||||
# 4. Add display items (cards, lists, or simple display)
|
# 4. Add display items from Thinker's emit_display() calls
|
||||||
if thought.display_items:
|
if thought.display_items:
|
||||||
for item in thought.display_items:
|
for item in thought.display_items:
|
||||||
item_type = item.get("type", "text")
|
|
||||||
if item_type in ("card", "list"):
|
|
||||||
# Pass through structured components as-is
|
|
||||||
controls.append(item)
|
|
||||||
else:
|
|
||||||
controls.append({
|
controls.append({
|
||||||
"type": "display",
|
"type": "display",
|
||||||
"display_type": item_type,
|
"display_type": item.get("type", "text"),
|
||||||
"label": item.get("label", ""),
|
"label": item.get("label", ""),
|
||||||
"value": item.get("value", ""),
|
"value": item.get("value", ""),
|
||||||
"style": item.get("style", ""),
|
"style": item.get("style", ""),
|
||||||
@ -384,155 +338,21 @@ class UINode(Node):
|
|||||||
|
|
||||||
return controls
|
return controls
|
||||||
|
|
||||||
def _build_artifacts(self, thought: ThoughtResult) -> list[dict]:
|
|
||||||
"""Convert ThoughtResult into typed artifacts."""
|
|
||||||
arts = []
|
|
||||||
|
|
||||||
# 1. Direct artifacts from expert's emit_artifact calls
|
|
||||||
if thought.artifacts:
|
|
||||||
for a in thought.artifacts:
|
|
||||||
if not a.get("id"):
|
|
||||||
a["id"] = str(uuid.uuid4())[:8]
|
|
||||||
arts.append(a)
|
|
||||||
|
|
||||||
# 2. Convert display_items (cards, lists) → entity_detail artifacts
|
|
||||||
if thought.display_items:
|
|
||||||
for item in thought.display_items:
|
|
||||||
item_type = item.get("type", "text")
|
|
||||||
if item_type == "card":
|
|
||||||
arts.append({
|
|
||||||
"id": str(uuid.uuid4())[:8],
|
|
||||||
"type": "entity_detail",
|
|
||||||
"data": {
|
|
||||||
"title": item.get("title", ""),
|
|
||||||
"subtitle": item.get("subtitle", ""),
|
|
||||||
"fields": item.get("fields", []),
|
|
||||||
},
|
|
||||||
"actions": item.get("actions", []),
|
|
||||||
"meta": {},
|
|
||||||
})
|
|
||||||
elif item_type == "list":
|
|
||||||
arts.append({
|
|
||||||
"id": str(uuid.uuid4())[:8],
|
|
||||||
"type": "entity_detail",
|
|
||||||
"data": {
|
|
||||||
"title": item.get("title", ""),
|
|
||||||
"items": item.get("items", []),
|
|
||||||
},
|
|
||||||
"actions": [],
|
|
||||||
"meta": {"list": True},
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
arts.append({
|
|
||||||
"id": str(uuid.uuid4())[:8],
|
|
||||||
"type": "status",
|
|
||||||
"data": {
|
|
||||||
"display_type": item_type,
|
|
||||||
"label": item.get("label", ""),
|
|
||||||
"value": item.get("value", ""),
|
|
||||||
"style": item.get("style", ""),
|
|
||||||
},
|
|
||||||
"actions": [],
|
|
||||||
"meta": {},
|
|
||||||
})
|
|
||||||
|
|
||||||
# 3. Convert actions → action_bar artifact
|
|
||||||
if thought.actions:
|
|
||||||
btns = self._parse_thinker_actions(thought.actions)
|
|
||||||
arts.append({
|
|
||||||
"id": "action_bar",
|
|
||||||
"type": "action_bar",
|
|
||||||
"data": {},
|
|
||||||
"actions": [{"label": b["label"], "action": b["action"],
|
|
||||||
"payload": b.get("payload", {})} for b in btns],
|
|
||||||
"meta": {},
|
|
||||||
})
|
|
||||||
elif self.thinker_controls:
|
|
||||||
# Preserve existing buttons as action_bar
|
|
||||||
existing_btns = [c for c in self.thinker_controls if c.get("type") == "button"]
|
|
||||||
if existing_btns:
|
|
||||||
arts.append({
|
|
||||||
"id": "action_bar",
|
|
||||||
"type": "action_bar",
|
|
||||||
"data": {},
|
|
||||||
"actions": [{"label": b["label"], "action": b["action"],
|
|
||||||
"payload": b.get("payload", {})} for b in existing_btns],
|
|
||||||
"meta": {},
|
|
||||||
})
|
|
||||||
|
|
||||||
# 4. Convert tool_output table → data_table artifact
|
|
||||||
if thought.tool_output:
|
|
||||||
table = self._extract_table(thought.tool_output)
|
|
||||||
if table:
|
|
||||||
arts.append({
|
|
||||||
"id": str(uuid.uuid4())[:8],
|
|
||||||
"type": "data_table",
|
|
||||||
"data": {
|
|
||||||
"columns": table["columns"],
|
|
||||||
"rows": table["data"],
|
|
||||||
},
|
|
||||||
"actions": [],
|
|
||||||
"meta": {"source": thought.tool_used or "query_db"},
|
|
||||||
})
|
|
||||||
|
|
||||||
# 5. State variables → status artifacts
|
|
||||||
if thought.state_updates:
|
|
||||||
for key, value in thought.state_updates.items():
|
|
||||||
self.set_var(key, value)
|
|
||||||
for var, value in self.state.items():
|
|
||||||
arts.append({
|
|
||||||
"id": f"state_{var}",
|
|
||||||
"type": "status",
|
|
||||||
"data": {"label": var, "value": str(value), "display_type": "text"},
|
|
||||||
"actions": [],
|
|
||||||
"meta": {"state_var": True},
|
|
||||||
})
|
|
||||||
|
|
||||||
# 6. Machines → machine artifacts
|
|
||||||
for mid, machine in self.machines.items():
|
|
||||||
current = machine["current"]
|
|
||||||
state_def = machine["states"].get(current, {})
|
|
||||||
arts.append({
|
|
||||||
"id": f"machine_{mid}",
|
|
||||||
"type": "machine",
|
|
||||||
"data": {
|
|
||||||
"machine_id": mid,
|
|
||||||
"current": current,
|
|
||||||
"states": list(machine["states"].keys()),
|
|
||||||
"content": state_def.get("content", []),
|
|
||||||
"stored_data": machine.get("data", {}),
|
|
||||||
},
|
|
||||||
"actions": [{"label": b.get("label", ""), "action": b.get("action", ""),
|
|
||||||
"go": b.get("go", "")}
|
|
||||||
for b in state_def.get("buttons", []) if isinstance(b, dict)],
|
|
||||||
"meta": {"live": True},
|
|
||||||
})
|
|
||||||
|
|
||||||
return arts
|
|
||||||
|
|
||||||
def get_artifacts(self) -> list[dict]:
|
|
||||||
"""Return current artifact list."""
|
|
||||||
return self.artifacts
|
|
||||||
|
|
||||||
async def process(self, thought: ThoughtResult, history: list[dict],
|
async def process(self, thought: ThoughtResult, history: list[dict],
|
||||||
memory_context: str = "") -> list[dict]:
|
memory_context: str = "") -> list[dict]:
|
||||||
# Apply machine ops first (create/add_state/reset/destroy)
|
# Apply machine ops first (create/add_state/reset/destroy)
|
||||||
if thought.machine_ops:
|
if thought.machine_ops:
|
||||||
await self.apply_machine_ops(thought.machine_ops)
|
await self.apply_machine_ops(thought.machine_ops)
|
||||||
|
|
||||||
# Build artifacts (new system)
|
|
||||||
self.artifacts = self._build_artifacts(thought)
|
|
||||||
|
|
||||||
# Build legacy controls (backward compat)
|
|
||||||
thinker_ctrls = self._build_controls(thought)
|
thinker_ctrls = self._build_controls(thought)
|
||||||
|
|
||||||
if thinker_ctrls:
|
if thinker_ctrls:
|
||||||
self.thinker_controls = thinker_ctrls
|
self.thinker_controls = thinker_ctrls
|
||||||
# Always emit the merged view (thinker + machine)
|
# Always emit the merged view (thinker + machine)
|
||||||
merged = self.current_controls
|
merged = self.current_controls
|
||||||
if merged or self.artifacts:
|
if merged:
|
||||||
await self.hud("controls", controls=merged)
|
await self.hud("controls", controls=merged)
|
||||||
log.info(f"[ui] emitting {len(merged)} controls + {len(self.artifacts)} artifacts")
|
log.info(f"[ui] emitting {len(merged)} controls ({len(self.thinker_controls)} thinker + {len(self.get_machine_controls())} machine)")
|
||||||
else:
|
else:
|
||||||
await self.hud("decided", instruction="no new controls")
|
await self.hud("decided", instruction="no new controls")
|
||||||
|
|
||||||
|
|||||||
@ -17,7 +17,7 @@ log = logging.getLogger("runtime")
|
|||||||
TRACE_FILE = Path(__file__).parent.parent / "trace.jsonl"
|
TRACE_FILE = Path(__file__).parent.parent / "trace.jsonl"
|
||||||
|
|
||||||
# Default graph — can be switched at runtime
|
# Default graph — can be switched at runtime
|
||||||
_active_graph_name = "v4-eras"
|
_active_graph_name = "v1-current"
|
||||||
|
|
||||||
|
|
||||||
class OutputSink:
|
class OutputSink:
|
||||||
@ -56,13 +56,6 @@ class OutputSink:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def send_artifacts(self, artifacts: list):
|
|
||||||
if self.ws:
|
|
||||||
try:
|
|
||||||
await self.ws.send_text(json.dumps({"type": "artifacts", "artifacts": artifacts}))
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def send_hud(self, data: dict):
|
async def send_hud(self, data: dict):
|
||||||
if self.ws:
|
if self.ws:
|
||||||
try:
|
try:
|
||||||
@ -228,9 +221,8 @@ class Runtime:
|
|||||||
self.history.append({"role": "user", "content": action_desc})
|
self.history.append({"role": "user", "content": action_desc})
|
||||||
|
|
||||||
sensor_lines = self.sensor.get_context_lines()
|
sensor_lines = self.sensor.get_context_lines()
|
||||||
director_line = self.director.get_context_line() if self.director else ""
|
director_line = self.director.get_context_line()
|
||||||
mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state)
|
mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state)
|
||||||
if director_line:
|
|
||||||
mem_ctx += f"\n\n{director_line}"
|
mem_ctx += f"\n\n{director_line}"
|
||||||
|
|
||||||
command = Command(
|
command = Command(
|
||||||
@ -250,7 +242,7 @@ class Runtime:
|
|||||||
self.history.append({"role": "assistant", "content": response})
|
self.history.append({"role": "assistant", "content": response})
|
||||||
|
|
||||||
await self.memorizer.update(self.history)
|
await self.memorizer.update(self.history)
|
||||||
if not self.is_v2 and self.director:
|
if not self.is_v2:
|
||||||
await self.director.update(self.history, self.memorizer.state)
|
await self.director.update(self.history, self.memorizer.state)
|
||||||
|
|
||||||
if len(self.history) > self.MAX_HISTORY:
|
if len(self.history) > self.MAX_HISTORY:
|
||||||
@ -327,9 +319,8 @@ class Runtime:
|
|||||||
# Check Sensor flags (idle return, workspace mismatch)
|
# Check Sensor flags (idle return, workspace mismatch)
|
||||||
sensor_flags = self.sensor.consume_flags()
|
sensor_flags = self.sensor.consume_flags()
|
||||||
sensor_lines = self.sensor.get_context_lines()
|
sensor_lines = self.sensor.get_context_lines()
|
||||||
director_line = self.director.get_context_line() if self.director else ""
|
director_line = self.director.get_context_line()
|
||||||
mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state)
|
mem_ctx = self.memorizer.get_context_block(sensor_lines=sensor_lines, ui_state=self.ui_node.state)
|
||||||
if director_line:
|
|
||||||
mem_ctx += f"\n\n{director_line}"
|
mem_ctx += f"\n\n{director_line}"
|
||||||
machine_summary = self.ui_node.get_machine_summary()
|
machine_summary = self.ui_node.get_machine_summary()
|
||||||
if machine_summary:
|
if machine_summary:
|
||||||
|
|||||||
@ -76,19 +76,6 @@ class PARouting:
|
|||||||
language: str = "de" # Response language
|
language: str = "de" # Response language
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Artifact:
|
|
||||||
"""A typed workspace item. The unit of workspace content."""
|
|
||||||
id: str # unique ID
|
|
||||||
type: str # entity_detail | data_table | document_page | action_bar | status
|
|
||||||
data: dict = field(default_factory=dict) # type-specific payload
|
|
||||||
actions: list = field(default_factory=list) # [{label, action, payload?}]
|
|
||||||
meta: dict = field(default_factory=dict) # {entity?, related?, source_query?}
|
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ThoughtResult:
|
class ThoughtResult:
|
||||||
"""Thinker node's output — either a direct answer or tool results."""
|
"""Thinker node's output — either a direct answer or tool results."""
|
||||||
@ -99,5 +86,3 @@ class ThoughtResult:
|
|||||||
state_updates: dict = field(default_factory=dict) # {key: value} from set_state
|
state_updates: dict = field(default_factory=dict) # {key: value} from set_state
|
||||||
display_items: list = field(default_factory=list) # [{type, label, value?, style?}] from emit_display
|
display_items: list = field(default_factory=list) # [{type, label, value?, style?}] from emit_display
|
||||||
machine_ops: list = field(default_factory=list) # [{op, id, ...}] from machine tools
|
machine_ops: list = field(default_factory=list) # [{op, id, ...}] from machine tools
|
||||||
errors: list = field(default_factory=list) # [{query, error, describe?}] from failed retries
|
|
||||||
artifacts: list = field(default_factory=list) # [Artifact] from emit_artifact
|
|
||||||
|
|||||||
@ -255,24 +255,14 @@ def check_actions(actions: list, check: str) -> tuple[bool, str]:
|
|||||||
return True, f"{len(actions)} actions >= {expected}"
|
return True, f"{len(actions)} actions >= {expected}"
|
||||||
return False, f"{len(actions)} actions < {expected}"
|
return False, f"{len(actions)} actions < {expected}"
|
||||||
|
|
||||||
# has TYPE or has TYPE1 or TYPE2
|
# has table
|
||||||
m = re.match(r'has\s+(.+)', check)
|
if check.strip() == "has table":
|
||||||
if m:
|
|
||||||
types = [t.strip() for t in m.group(1).split(" or has ")]
|
|
||||||
# Also handle "card or has table" → ["card", "table"]
|
|
||||||
types = [t.replace("has ", "") for t in types]
|
|
||||||
for a in actions:
|
for a in actions:
|
||||||
if isinstance(a, dict) and a.get("type") in types:
|
if isinstance(a, dict) and a.get("type") == "table":
|
||||||
atype = a.get("type")
|
cols = a.get("columns", [])
|
||||||
if atype == "table":
|
rows = len(a.get("data", []))
|
||||||
return True, f"table found: {len(a.get('columns', []))} cols, {len(a.get('data', []))} rows"
|
return True, f"table found: {len(cols)} cols, {rows} rows"
|
||||||
elif atype == "card":
|
return False, f"no table in {len(actions)} controls"
|
||||||
return True, f"card found: {a.get('title', '?')}, {len(a.get('fields', []))} fields"
|
|
||||||
elif atype == "list":
|
|
||||||
return True, f"list found: {a.get('title', '?')}, {len(a.get('items', []))} items"
|
|
||||||
else:
|
|
||||||
return True, f"{atype} found"
|
|
||||||
return False, f"no {' or '.join(types)} in {len(actions)} controls ({[a.get('type','?') for a in actions if isinstance(a, dict)]})"
|
|
||||||
|
|
||||||
# any action contains "foo" or "bar" — searches buttons only
|
# any action contains "foo" or "bar" — searches buttons only
|
||||||
m = re.match(r'any action contains\s+"?(.+?)"?\s*$', check)
|
m = re.match(r'any action contains\s+"?(.+?)"?\s*$', check)
|
||||||
@ -382,12 +372,6 @@ def check_trace(trace: list, check: str) -> tuple[bool, str]:
|
|||||||
return True, f"found reset_machine via machine_reset event"
|
return True, f"found reset_machine via machine_reset event"
|
||||||
if t.get("event") == "machine_destroyed" and tool_name == "destroy_machine":
|
if t.get("event") == "machine_destroyed" and tool_name == "destroy_machine":
|
||||||
return True, f"found destroy_machine via machine_destroyed event"
|
return True, f"found destroy_machine via machine_destroyed event"
|
||||||
if t.get("event") == "machine_data_updated" and tool_name == "update_machine":
|
|
||||||
return True, f"found update_machine via machine_data_updated event"
|
|
||||||
if t.get("event") == "machine_transitioned" and tool_name == "transition_machine":
|
|
||||||
return True, f"found transition_machine via machine_transitioned event"
|
|
||||||
if t.get("event") == "pa_retry" and tool_name == "pa_retry":
|
|
||||||
return True, f"found pa_retry event"
|
|
||||||
return False, f"no tool_call '{tool_name}' in trace"
|
return False, f"no tool_call '{tool_name}' in trace"
|
||||||
|
|
||||||
# machine_created id="NAV" — checks for specific machine creation
|
# machine_created id="NAV" — checks for specific machine creation
|
||||||
|
|||||||
@ -953,24 +953,6 @@ function send() {
|
|||||||
inputEl.value = '';
|
inputEl.value = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
async function clearSession() {
|
|
||||||
try {
|
|
||||||
const headers = { 'Content-Type': 'application/json' };
|
|
||||||
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
|
|
||||||
await fetch('/api/clear', { method: 'POST', headers });
|
|
||||||
// Clear UI
|
|
||||||
msgs.innerHTML = '';
|
|
||||||
traceEl.innerHTML = '';
|
|
||||||
_currentDashboard = [];
|
|
||||||
currentEl = null;
|
|
||||||
const dock = document.getElementById('dock');
|
|
||||||
if (dock) dock.innerHTML = '';
|
|
||||||
addTrace('runtime', 'cleared', 'session reset');
|
|
||||||
} catch (e) {
|
|
||||||
addTrace('runtime', 'error', 'clear failed: ' + e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Awareness panel updates ---
|
// --- Awareness panel updates ---
|
||||||
|
|
||||||
let _sensorReadings = {};
|
let _sensorReadings = {};
|
||||||
|
|||||||
@ -16,8 +16,6 @@
|
|||||||
<h1>cog</h1>
|
<h1>cog</h1>
|
||||||
<div id="test-status"></div>
|
<div id="test-status"></div>
|
||||||
<div style="flex:1"></div>
|
<div style="flex:1"></div>
|
||||||
<div id="graph-switcher"></div>
|
|
||||||
<button onclick="clearSession()" class="btn-top" title="Clear session">Clear</button>
|
|
||||||
<div id="status">disconnected</div>
|
<div id="status">disconnected</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -29,7 +27,17 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="panel detail-panel">
|
<div class="panel detail-panel">
|
||||||
<div class="panel-header detail-h">Nodes</div>
|
<div class="panel-header detail-h">Nodes</div>
|
||||||
<div id="node-metrics"></div>
|
<div id="node-metrics">
|
||||||
|
<div class="node-meter" id="meter-input"><span class="nm-label">input</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
|
||||||
|
<div class="node-meter" id="meter-director_v2"><span class="nm-label">director</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
|
||||||
|
<div class="node-meter" id="meter-pa_v1"><span class="nm-label">PA</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
|
||||||
|
<div class="node-meter" id="meter-thinker"><span class="nm-label">thinker</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
|
||||||
|
<div class="node-meter" id="meter-eras_expert"><span class="nm-label">eras</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
|
||||||
|
<div class="node-meter" id="meter-output"><span class="nm-label">output</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
|
||||||
|
<div class="node-meter" id="meter-memorizer"><span class="nm-label">memo</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
|
||||||
|
<div class="node-meter" id="meter-interpreter"><span class="nm-label">interp</span><div class="nm-bar"><div class="nm-fill"></div></div><span class="nm-text"></span></div>
|
||||||
|
<div class="node-meter" id="meter-sensor"><span class="nm-label">sensor</span><span class="nm-text" style="flex:1"></span></div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="panel graph-panel">
|
<div class="panel graph-panel">
|
||||||
<div class="panel-header graph-h">Graph
|
<div class="panel-header graph-h">Graph
|
||||||
@ -50,6 +58,7 @@
|
|||||||
<div id="input-bar">
|
<div id="input-bar">
|
||||||
<input id="input" placeholder="Type a message..." autocomplete="off">
|
<input id="input" placeholder="Type a message..." autocomplete="off">
|
||||||
<button onclick="send()">Send</button>
|
<button onclick="send()">Send</button>
|
||||||
|
<button onclick="clearSession()" class="btn-clear" title="Clear session">✕</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="panel awareness-panel">
|
<div class="panel awareness-panel">
|
||||||
|
|||||||
@ -1,178 +1,15 @@
|
|||||||
/** Awareness panel: memorizer state, sensor readings.
|
/** Awareness panel: memorizer state, sensor readings, node meters. */
|
||||||
* Node detail panel: per-node model, tokens, progress, last event.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { esc, truncate } from './util.js';
|
import { esc, truncate } from './util.js';
|
||||||
|
|
||||||
let _sensorReadings = {};
|
let _sensorReadings = {};
|
||||||
|
|
||||||
// --- Node state tracker ---
|
|
||||||
const _nodeState = {}; // { nodeName: { model, tokens, maxTokens, fillPct, lastEvent, lastDetail, status, toolCalls, startedAt } }
|
|
||||||
|
|
||||||
// Normalize node names to avoid duplicates (pa_v1→pa, expert_eras→eras, etc.)
|
|
||||||
function _normName(name) {
|
|
||||||
return name.replace('_v1', '').replace('_v2', '').replace('expert_', '');
|
|
||||||
}
|
|
||||||
|
|
||||||
function _getNode(name) {
|
|
||||||
const key = _normName(name);
|
|
||||||
if (!_nodeState[key]) {
|
|
||||||
_nodeState[key] = {
|
|
||||||
model: '', tokens: 0, maxTokens: 0, fillPct: 0,
|
|
||||||
lastEvent: '', lastDetail: '', status: 'idle',
|
|
||||||
toolCalls: 0, lastTool: '',
|
|
||||||
};
|
|
||||||
}
|
|
||||||
return _nodeState[key];
|
|
||||||
}
|
|
||||||
|
|
||||||
export function updateNodeFromHud(node, event, data) {
|
|
||||||
const n = _getNode(node);
|
|
||||||
|
|
||||||
if (event === 'context') {
|
|
||||||
if (data.model) n.model = data.model.replace('google/', '').replace('anthropic/', '');
|
|
||||||
if (data.tokens !== undefined) n.tokens = data.tokens;
|
|
||||||
if (data.max_tokens !== undefined) n.maxTokens = data.max_tokens;
|
|
||||||
if (data.fill_pct !== undefined) n.fillPct = data.fill_pct;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (event === 'thinking') {
|
|
||||||
n.status = 'thinking';
|
|
||||||
n.lastEvent = 'thinking';
|
|
||||||
n.lastDetail = data.detail || '';
|
|
||||||
} else if (event === 'perceived') {
|
|
||||||
n.status = 'done';
|
|
||||||
n.lastEvent = 'perceived';
|
|
||||||
const a = data.analysis || {};
|
|
||||||
n.lastDetail = `${a.intent || '?'}/${a.language || '?'}/${a.tone || '?'}`;
|
|
||||||
} else if (event === 'decided' || event === 'routed') {
|
|
||||||
n.status = 'done';
|
|
||||||
n.lastEvent = event;
|
|
||||||
n.lastDetail = data.goal || data.instruction || data.job || '';
|
|
||||||
} else if (event === 'tool_call') {
|
|
||||||
n.status = 'tool';
|
|
||||||
n.lastEvent = 'tool_call';
|
|
||||||
n.lastTool = data.tool || '';
|
|
||||||
n.lastDetail = data.tool || '';
|
|
||||||
n.toolCalls++;
|
|
||||||
} else if (event === 'tool_result') {
|
|
||||||
n.lastEvent = 'tool_result';
|
|
||||||
n.lastDetail = truncate(data.output || '', 50);
|
|
||||||
} else if (event === 'streaming') {
|
|
||||||
n.status = 'streaming';
|
|
||||||
n.lastEvent = 'streaming';
|
|
||||||
} else if (event === 'done') {
|
|
||||||
n.status = 'done';
|
|
||||||
n.lastEvent = 'done';
|
|
||||||
} else if (event === 'updated') {
|
|
||||||
n.status = 'done';
|
|
||||||
n.lastEvent = 'updated';
|
|
||||||
} else if (event === 'planned') {
|
|
||||||
n.status = 'planned';
|
|
||||||
n.lastEvent = 'planned';
|
|
||||||
n.lastDetail = `${data.tools || 0} tools`;
|
|
||||||
} else if (event === 'interpreted') {
|
|
||||||
n.status = 'done';
|
|
||||||
n.lastEvent = 'interpreted';
|
|
||||||
n.lastDetail = truncate(data.summary || '', 50);
|
|
||||||
}
|
|
||||||
|
|
||||||
renderNodes();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fixed pipeline order — no re-sorting
|
|
||||||
// Fixed pipeline order using normalized names
|
|
||||||
const PIPELINE_ORDER = ['input', 'pa', 'director', 'eras', 'plankiste',
|
|
||||||
'thinker', 'interpreter', 'output', 'memorizer', 'ui', 'sensor'];
|
|
||||||
|
|
||||||
function renderNodes() {
|
|
||||||
const el = document.getElementById('node-metrics');
|
|
||||||
if (!el) return;
|
|
||||||
|
|
||||||
const entries = Object.entries(_nodeState)
|
|
||||||
.filter(([name]) => name !== 'runtime' && name !== 'frame_engine');
|
|
||||||
|
|
||||||
const sorted = entries.sort((a, b) => {
|
|
||||||
const ia = PIPELINE_ORDER.indexOf(a[0]);
|
|
||||||
const ib = PIPELINE_ORDER.indexOf(b[0]);
|
|
||||||
return (ia === -1 ? 99 : ia) - (ib === -1 ? 99 : ib);
|
|
||||||
});
|
|
||||||
|
|
||||||
let html = '';
|
|
||||||
for (const [name, n] of sorted) {
|
|
||||||
const statusClass = n.status === 'thinking' || n.status === 'tool' ? 'nm-active'
|
|
||||||
: n.status === 'streaming' ? 'nm-streaming' : '';
|
|
||||||
const shortName = name.replace('_v1', '').replace('_v2', '').replace('expert_', '');
|
|
||||||
const modelShort = n.model ? n.model.split('/').pop().replace('-001', '').replace('-4.5', '4.5') : '';
|
|
||||||
const tokenStr = n.maxTokens ? `${n.tokens}/${n.maxTokens}t` : '';
|
|
||||||
const fillW = n.fillPct || 0;
|
|
||||||
const detail = n.lastDetail ? truncate(n.lastDetail, 45) : '';
|
|
||||||
const toolStr = n.toolCalls > 0 ? ` [${n.toolCalls} calls]` : '';
|
|
||||||
|
|
||||||
html += `<div class="node-card ${statusClass}">
|
|
||||||
<div class="nc-header">
|
|
||||||
<span class="nc-name">${esc(shortName)}</span>
|
|
||||||
<span class="nc-model">${esc(modelShort)}</span>
|
|
||||||
<span class="nc-tokens">${esc(tokenStr)}</span>
|
|
||||||
</div>
|
|
||||||
<div class="nc-bar"><div class="nc-fill" style="width:${fillW}%"></div></div>
|
|
||||||
<div class="nc-status">
|
|
||||||
<span class="nc-event">${esc(n.lastEvent)}</span>
|
|
||||||
<span class="nc-detail">${esc(detail)}${esc(toolStr)}</span>
|
|
||||||
</div>
|
|
||||||
</div>`;
|
|
||||||
}
|
|
||||||
el.innerHTML = html;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function initNodesFromGraph(graphData) {
|
|
||||||
// Populate node cards from graph definition (before any messages)
|
|
||||||
const nodes = graphData.nodes || {};
|
|
||||||
const details = graphData.node_details || {};
|
|
||||||
for (const [role, impl] of Object.entries(nodes)) {
|
|
||||||
const n = _getNode(role);
|
|
||||||
const d = details[role];
|
|
||||||
if (d) {
|
|
||||||
n.model = (d.model || '').replace('google/', '').replace('anthropic/', '');
|
|
||||||
n.maxTokens = d.max_tokens || 0;
|
|
||||||
}
|
|
||||||
n.lastEvent = 'idle';
|
|
||||||
n.status = 'idle';
|
|
||||||
}
|
|
||||||
renderNodes();
|
|
||||||
}
|
|
||||||
|
|
||||||
export function clearNodes() {
|
|
||||||
for (const key of Object.keys(_nodeState)) delete _nodeState[key];
|
|
||||||
const el = document.getElementById('node-metrics');
|
|
||||||
if (el) el.innerHTML = '';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keep old meter function for backward compat (called from ws.js)
|
|
||||||
export function updateMeter(node, tokens, maxTokens, fillPct) {
|
|
||||||
const n = _getNode(node);
|
|
||||||
n.tokens = tokens;
|
|
||||||
n.maxTokens = maxTokens;
|
|
||||||
n.fillPct = fillPct;
|
|
||||||
renderNodes();
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Awareness: memorizer state ---
|
|
||||||
|
|
||||||
export function updateAwarenessState(state) {
|
export function updateAwarenessState(state) {
|
||||||
const body = document.getElementById('aw-state-body');
|
const body = document.getElementById('aw-state-body');
|
||||||
if (!body) return;
|
if (!body) return;
|
||||||
const expectation = state.user_expectation || 'conversational';
|
|
||||||
const expClass = {
|
|
||||||
conversational: 'aw-exp-conv',
|
|
||||||
delegated: 'aw-exp-deleg',
|
|
||||||
waiting_input: 'aw-exp-wait',
|
|
||||||
observing: 'aw-exp-obs',
|
|
||||||
}[expectation] || '';
|
|
||||||
const display = [
|
const display = [
|
||||||
['user', state.user_name],
|
['user', state.user_name],
|
||||||
['mood', state.user_mood],
|
['mood', state.user_mood],
|
||||||
['expectation', expectation, expClass],
|
|
||||||
['topic', state.topic],
|
['topic', state.topic],
|
||||||
['lang', state.language],
|
['lang', state.language],
|
||||||
['style', state.style_hint],
|
['style', state.style_hint],
|
||||||
@ -181,8 +18,8 @@ export function updateAwarenessState(state) {
|
|||||||
const facts = state.facts || [];
|
const facts = state.facts || [];
|
||||||
const history = state.topic_history || [];
|
const history = state.topic_history || [];
|
||||||
|
|
||||||
let html = display.map(([k, v, cls]) =>
|
let html = display.map(([k, v]) =>
|
||||||
`<div class="aw-row"><span class="aw-key">${esc(k)}</span><span class="aw-val ${cls || ''}">${esc(v || 'null')}</span></div>`
|
`<div class="aw-row"><span class="aw-key">${esc(k)}</span><span class="aw-val">${esc(v || 'null')}</span></div>`
|
||||||
).join('');
|
).join('');
|
||||||
|
|
||||||
if (facts.length) {
|
if (facts.length) {
|
||||||
@ -196,8 +33,6 @@ export function updateAwarenessState(state) {
|
|||||||
body.innerHTML = html;
|
body.innerHTML = html;
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Awareness: sensor readings ---
|
|
||||||
|
|
||||||
export function updateAwarenessSensors(tick, deltas) {
|
export function updateAwarenessSensors(tick, deltas) {
|
||||||
const body = document.getElementById('aw-sensor-body');
|
const body = document.getElementById('aw-sensor-body');
|
||||||
if (!body) return;
|
if (!body) return;
|
||||||
@ -211,3 +46,12 @@ export function updateAwarenessSensors(tick, deltas) {
|
|||||||
}
|
}
|
||||||
body.innerHTML = html;
|
body.innerHTML = html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function updateMeter(node, tokens, maxTokens, fillPct) {
|
||||||
|
const meter = document.getElementById('meter-' + node);
|
||||||
|
if (!meter) return;
|
||||||
|
const bar = meter.querySelector('.nm-bar');
|
||||||
|
const text = meter.querySelector('.nm-text');
|
||||||
|
if (bar) bar.style.width = fillPct + '%';
|
||||||
|
if (text) text.textContent = `${tokens}/${maxTokens}t`;
|
||||||
|
}
|
||||||
|
|||||||
@ -1,9 +1,6 @@
|
|||||||
/** Dashboard: workspace artifact + control rendering.
|
/** Dashboard: workspace controls rendering (buttons, tables, labels, displays, machines). */
|
||||||
* Artifact system: typed artifacts (entity_detail, data_table, document_page, action_bar, status, machine).
|
|
||||||
* Legacy: dockControls() still works as fallback for old control format.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { esc, renderMarkdown } from './util.js';
|
import { esc } from './util.js';
|
||||||
import { addTrace } from './trace.js';
|
import { addTrace } from './trace.js';
|
||||||
import { setDashboard } from './chat.js';
|
import { setDashboard } from './chat.js';
|
||||||
|
|
||||||
@ -11,233 +8,8 @@ let _ws = null;
|
|||||||
|
|
||||||
export function setWs(ws) { _ws = ws; }
|
export function setWs(ws) { _ws = ws; }
|
||||||
|
|
||||||
function _sendAction(action, data) {
|
|
||||||
if (_ws && _ws.readyState === 1) {
|
|
||||||
_ws.send(JSON.stringify({ type: 'action', action, data: data || {} }));
|
|
||||||
addTrace('runtime', 'action', action);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Artifact system ---
|
|
||||||
|
|
||||||
export function dockArtifacts(artifacts) {
|
|
||||||
const body = document.getElementById('workspace-body');
|
|
||||||
if (!body) return;
|
|
||||||
body.innerHTML = '';
|
|
||||||
const container = document.createElement('div');
|
|
||||||
container.className = 'artifacts-container';
|
|
||||||
|
|
||||||
for (const art of artifacts) {
|
|
||||||
const wrapper = document.createElement('div');
|
|
||||||
wrapper.className = 'ws-artifact ws-artifact-' + (art.type || 'unknown');
|
|
||||||
wrapper.dataset.artifactId = art.id || '';
|
|
||||||
|
|
||||||
const renderer = RENDERERS[art.type];
|
|
||||||
if (renderer) {
|
|
||||||
renderer(wrapper, art);
|
|
||||||
} else {
|
|
||||||
wrapper.innerHTML = '<div class="ws-artifact-fallback">' + esc(JSON.stringify(art.data || {})) + '</div>';
|
|
||||||
}
|
|
||||||
|
|
||||||
container.appendChild(wrapper);
|
|
||||||
}
|
|
||||||
body.appendChild(container);
|
|
||||||
// Also set dashboard for S3* audit (flatten actions from artifacts)
|
|
||||||
const flatControls = artifacts.flatMap(a => (a.actions || []).map(act => ({type: 'button', ...act})));
|
|
||||||
setDashboard(flatControls);
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Artifact renderers ---
|
|
||||||
|
|
||||||
const RENDERERS = {
|
|
||||||
entity_detail: renderEntityDetail,
|
|
||||||
data_table: renderDataTable,
|
|
||||||
document_page: renderDocumentPage,
|
|
||||||
action_bar: renderActionBar,
|
|
||||||
status: renderStatus,
|
|
||||||
machine: renderMachine,
|
|
||||||
};
|
|
||||||
|
|
||||||
function renderEntityDetail(el, art) {
|
|
||||||
const d = art.data || {};
|
|
||||||
let html = '';
|
|
||||||
if (d.title) html += '<div class="ws-card-title">' + esc(d.title) + '</div>';
|
|
||||||
if (d.subtitle) html += '<div class="ws-card-subtitle">' + esc(d.subtitle) + '</div>';
|
|
||||||
|
|
||||||
// List mode (multiple items)
|
|
||||||
if (d.items && d.items.length) {
|
|
||||||
html += '<div class="ws-list">';
|
|
||||||
for (const item of d.items) {
|
|
||||||
html += '<div class="ws-card ws-card-nested">';
|
|
||||||
if (item.title) html += '<div class="ws-card-title">' + esc(item.title) + '</div>';
|
|
||||||
if (item.fields) {
|
|
||||||
html += '<div class="ws-card-fields">';
|
|
||||||
for (const f of item.fields) {
|
|
||||||
html += '<div class="ws-card-field"><span class="ws-card-key">' + esc(f.label || '') + '</span><span class="ws-card-val">' + esc(String(f.value ?? '')) + '</span></div>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Single entity fields
|
|
||||||
if (d.fields && d.fields.length) {
|
|
||||||
html += '<div class="ws-card-fields">';
|
|
||||||
for (const f of d.fields) {
|
|
||||||
const val = f.action
|
|
||||||
? '<span class="ws-card-link" data-action="' + esc(f.action) + '">' + esc(String(f.value ?? '')) + '</span>'
|
|
||||||
: '<span class="ws-card-val">' + esc(String(f.value ?? '')) + '</span>';
|
|
||||||
html += '<div class="ws-card-field"><span class="ws-card-key">' + esc(f.label || '') + '</span>' + val + '</div>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Actions
|
|
||||||
if (art.actions && art.actions.length) {
|
|
||||||
html += '<div class="ws-card-actions">';
|
|
||||||
for (const a of art.actions) {
|
|
||||||
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
|
|
||||||
el.innerHTML = html;
|
|
||||||
_wireActions(el);
|
|
||||||
}
|
|
||||||
|
|
||||||
function renderDataTable(el, art) {
|
|
||||||
const d = art.data || {};
|
|
||||||
if (d.title) {
|
|
||||||
const title = document.createElement('div');
|
|
||||||
title.className = 'ws-artifact-header';
|
|
||||||
title.textContent = d.title;
|
|
||||||
el.appendChild(title);
|
|
||||||
}
|
|
||||||
const table = document.createElement('table');
|
|
||||||
table.className = 'control-table';
|
|
||||||
const cols = d.columns || (d.rows && d.rows.length ? Object.keys(d.rows[0]) : []);
|
|
||||||
if (cols.length) {
|
|
||||||
const thead = document.createElement('tr');
|
|
||||||
for (const col of cols) {
|
|
||||||
const th = document.createElement('th');
|
|
||||||
th.textContent = col;
|
|
||||||
thead.appendChild(th);
|
|
||||||
}
|
|
||||||
table.appendChild(thead);
|
|
||||||
}
|
|
||||||
for (const row of (d.rows || d.data || [])) {
|
|
||||||
const tr = document.createElement('tr');
|
|
||||||
if (Array.isArray(row)) {
|
|
||||||
for (const cell of row) {
|
|
||||||
const td = document.createElement('td'); td.textContent = cell; tr.appendChild(td);
|
|
||||||
}
|
|
||||||
} else if (typeof row === 'object') {
|
|
||||||
for (const col of cols) {
|
|
||||||
const td = document.createElement('td'); td.textContent = row[col] ?? ''; tr.appendChild(td);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
table.appendChild(tr);
|
|
||||||
}
|
|
||||||
el.appendChild(table);
|
|
||||||
}
|
|
||||||
|
|
||||||
function renderDocumentPage(el, art) {
|
|
||||||
const d = art.data || {};
|
|
||||||
let html = '';
|
|
||||||
if (d.title) html += '<div class="ws-doc-title">' + esc(d.title) + '</div>';
|
|
||||||
for (const section of (d.sections || [])) {
|
|
||||||
html += '<div class="ws-doc-section">';
|
|
||||||
if (section.heading) html += '<div class="ws-doc-heading">' + esc(section.heading) + '</div>';
|
|
||||||
if (section.content) html += '<div class="ws-doc-content">' + renderMarkdown(section.content) + '</div>';
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
// Actions (e.g. PDF export)
|
|
||||||
if (art.actions && art.actions.length) {
|
|
||||||
html += '<div class="ws-card-actions">';
|
|
||||||
for (const a of art.actions) {
|
|
||||||
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
el.innerHTML = html;
|
|
||||||
_wireActions(el);
|
|
||||||
}
|
|
||||||
|
|
||||||
function renderActionBar(el, art) {
|
|
||||||
for (const a of (art.actions || [])) {
|
|
||||||
const btn = document.createElement('button');
|
|
||||||
btn.className = 'control-btn';
|
|
||||||
btn.textContent = a.label || '';
|
|
||||||
btn.onclick = () => _sendAction(a.action, a.payload || {});
|
|
||||||
el.appendChild(btn);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function renderStatus(el, art) {
|
|
||||||
const d = art.data || {};
|
|
||||||
const dt = d.display_type || 'text';
|
|
||||||
el.classList.add('display-' + dt);
|
|
||||||
if (dt === 'progress') {
|
|
||||||
const pct = Math.min(100, Math.max(0, Number(d.value) || 0));
|
|
||||||
el.innerHTML = '<span class="cd-label">' + esc(d.label) + '</span>'
|
|
||||||
+ '<div class="cd-bar"><div class="cd-fill" style="width:' + pct + '%"></div></div>'
|
|
||||||
+ '<span class="cd-pct">' + pct + '%</span>';
|
|
||||||
} else if (dt === 'info') {
|
|
||||||
el.innerHTML = '<span class="cd-icon">\u2139</span><span class="cd-label">' + esc(d.label) + '</span>';
|
|
||||||
} else {
|
|
||||||
el.innerHTML = '<span class="cd-label">' + esc(d.label || '') + '</span>'
|
|
||||||
+ (d.value ? '<span class="cd-value">' + esc(String(d.value)) + '</span>' : '');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function renderMachine(el, art) {
|
|
||||||
const d = art.data || {};
|
|
||||||
const mid = d.machine_id || '';
|
|
||||||
// Header
|
|
||||||
let html = '<div class="ws-machine-header"><span class="ws-machine-name">' + esc(mid) + '</span>'
|
|
||||||
+ '<span class="ws-machine-state">' + esc(d.current || '') + '</span></div>';
|
|
||||||
// Content
|
|
||||||
for (const text of (d.content || [])) {
|
|
||||||
html += '<div class="ws-machine-content">' + esc(text) + '</div>';
|
|
||||||
}
|
|
||||||
// Stored data
|
|
||||||
const stored = d.stored_data || {};
|
|
||||||
if (Object.keys(stored).length) {
|
|
||||||
html += '<div class="ws-machine-data">';
|
|
||||||
for (const [k, v] of Object.entries(stored)) {
|
|
||||||
html += '<span class="ws-machine-datum">' + esc(k) + '=' + esc(String(v)) + '</span>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
// Buttons
|
|
||||||
if (art.actions && art.actions.length) {
|
|
||||||
html += '<div class="ws-card-actions">';
|
|
||||||
for (const a of art.actions) {
|
|
||||||
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
el.innerHTML = html;
|
|
||||||
_wireActions(el);
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Helpers ---
|
|
||||||
|
|
||||||
function _wireActions(el) {
|
|
||||||
el.querySelectorAll('.ws-card-link').forEach(link => {
|
|
||||||
link.onclick = (e) => { e.stopPropagation(); _sendAction(link.dataset.action, {}); };
|
|
||||||
});
|
|
||||||
el.querySelectorAll('.ws-card-btn').forEach(btn => {
|
|
||||||
btn.onclick = (e) => { e.stopPropagation(); _sendAction(btn.dataset.action, {}); };
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Legacy control rendering (backward compat) ---
|
|
||||||
|
|
||||||
export function dockControls(controls) {
|
export function dockControls(controls) {
|
||||||
setDashboard(controls);
|
setDashboard(controls); // S3*: remember what's rendered
|
||||||
const body = document.getElementById('workspace-body');
|
const body = document.getElementById('workspace-body');
|
||||||
if (!body) return;
|
if (!body) return;
|
||||||
body.innerHTML = '';
|
body.innerHTML = '';
|
||||||
@ -249,7 +21,12 @@ export function dockControls(controls) {
|
|||||||
const btn = document.createElement('button');
|
const btn = document.createElement('button');
|
||||||
btn.className = 'control-btn';
|
btn.className = 'control-btn';
|
||||||
btn.textContent = ctrl.label;
|
btn.textContent = ctrl.label;
|
||||||
btn.onclick = () => _sendAction(ctrl.action, ctrl.payload || ctrl.data || {});
|
btn.onclick = () => {
|
||||||
|
if (_ws && _ws.readyState === 1) {
|
||||||
|
_ws.send(JSON.stringify({ type: 'action', action: ctrl.action, data: ctrl.payload || ctrl.data || {} }));
|
||||||
|
addTrace('runtime', 'action', ctrl.action);
|
||||||
|
}
|
||||||
|
};
|
||||||
container.appendChild(btn);
|
container.appendChild(btn);
|
||||||
} else if (ctrl.type === 'table') {
|
} else if (ctrl.type === 'table') {
|
||||||
const table = document.createElement('table');
|
const table = document.createElement('table');
|
||||||
@ -257,16 +34,22 @@ export function dockControls(controls) {
|
|||||||
if (ctrl.columns) {
|
if (ctrl.columns) {
|
||||||
const thead = document.createElement('tr');
|
const thead = document.createElement('tr');
|
||||||
for (const col of ctrl.columns) {
|
for (const col of ctrl.columns) {
|
||||||
const th = document.createElement('th'); th.textContent = col; thead.appendChild(th);
|
const th = document.createElement('th');
|
||||||
|
th.textContent = col;
|
||||||
|
thead.appendChild(th);
|
||||||
}
|
}
|
||||||
table.appendChild(thead);
|
table.appendChild(thead);
|
||||||
}
|
}
|
||||||
for (const row of (ctrl.data || [])) {
|
for (const row of (ctrl.data || [])) {
|
||||||
const tr = document.createElement('tr');
|
const tr = document.createElement('tr');
|
||||||
if (Array.isArray(row)) {
|
if (Array.isArray(row)) {
|
||||||
for (const cell of row) { const td = document.createElement('td'); td.textContent = cell; tr.appendChild(td); }
|
for (const cell of row) {
|
||||||
|
const td = document.createElement('td'); td.textContent = cell; tr.appendChild(td);
|
||||||
|
}
|
||||||
} else if (typeof row === 'object') {
|
} else if (typeof row === 'object') {
|
||||||
for (const col of (ctrl.columns || Object.keys(row))) { const td = document.createElement('td'); td.textContent = row[col] ?? ''; tr.appendChild(td); }
|
for (const col of (ctrl.columns || Object.keys(row))) {
|
||||||
|
const td = document.createElement('td'); td.textContent = row[col] ?? ''; tr.appendChild(td);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
table.appendChild(tr);
|
table.appendChild(tr);
|
||||||
}
|
}
|
||||||
@ -279,37 +62,21 @@ export function dockControls(controls) {
|
|||||||
} else if (ctrl.type === 'display') {
|
} else if (ctrl.type === 'display') {
|
||||||
const disp = document.createElement('div');
|
const disp = document.createElement('div');
|
||||||
const dt = ctrl.display_type || 'text';
|
const dt = ctrl.display_type || 'text';
|
||||||
disp.className = 'control-display display-' + dt;
|
const style = ctrl.style ? ' display-' + ctrl.style : '';
|
||||||
|
disp.className = 'control-display display-' + dt + style;
|
||||||
if (dt === 'progress') {
|
if (dt === 'progress') {
|
||||||
const pct = Math.min(100, Math.max(0, Number(ctrl.value) || 0));
|
const pct = Math.min(100, Math.max(0, Number(ctrl.value) || 0));
|
||||||
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span><div class="cd-bar"><div class="cd-fill" style="width:' + pct + '%"></div></div><span class="cd-pct">' + pct + '%</span>';
|
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span>'
|
||||||
|
+ '<div class="cd-bar"><div class="cd-fill" style="width:' + pct + '%"></div></div>'
|
||||||
|
+ '<span class="cd-pct">' + pct + '%</span>';
|
||||||
|
} else if (dt === 'status') {
|
||||||
|
disp.innerHTML = '<span class="cd-icon">' + (ctrl.style === 'success' ? '\u2713' : ctrl.style === 'error' ? '\u2717' : '\u2139') + '</span>'
|
||||||
|
+ '<span class="cd-label">' + esc(ctrl.label) + '</span>';
|
||||||
} else {
|
} else {
|
||||||
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span>' + (ctrl.value ? '<span class="cd-value">' + esc(String(ctrl.value)) + '</span>' : '');
|
disp.innerHTML = '<span class="cd-label">' + esc(ctrl.label) + '</span>'
|
||||||
|
+ (ctrl.value ? '<span class="cd-value">' + esc(String(ctrl.value)) + '</span>' : '');
|
||||||
}
|
}
|
||||||
container.appendChild(disp);
|
container.appendChild(disp);
|
||||||
} else if (ctrl.type === 'card') {
|
|
||||||
const card = document.createElement('div');
|
|
||||||
card.className = 'ws-card';
|
|
||||||
let html = '';
|
|
||||||
if (ctrl.title) html += '<div class="ws-card-title">' + esc(ctrl.title) + '</div>';
|
|
||||||
if (ctrl.subtitle) html += '<div class="ws-card-subtitle">' + esc(ctrl.subtitle) + '</div>';
|
|
||||||
if (ctrl.fields && ctrl.fields.length) {
|
|
||||||
html += '<div class="ws-card-fields">';
|
|
||||||
for (const f of ctrl.fields) {
|
|
||||||
html += '<div class="ws-card-field"><span class="ws-card-key">' + esc(f.label || '') + '</span><span class="ws-card-val">' + esc(String(f.value ?? '')) + '</span></div>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
if (ctrl.actions && ctrl.actions.length) {
|
|
||||||
html += '<div class="ws-card-actions">';
|
|
||||||
for (const a of ctrl.actions) {
|
|
||||||
html += '<button class="control-btn ws-card-btn" data-action="' + esc(a.action || '') + '">' + esc(a.label || '') + '</button>';
|
|
||||||
}
|
|
||||||
html += '</div>';
|
|
||||||
}
|
|
||||||
card.innerHTML = html;
|
|
||||||
_wireActions(card);
|
|
||||||
container.appendChild(card);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
body.appendChild(container);
|
body.appendChild(container);
|
||||||
|
|||||||
@ -1,12 +1,7 @@
|
|||||||
/** Pipeline graph: Cytoscape visualization + animation. */
|
/** Pipeline graph: Cytoscape visualization + animation. */
|
||||||
|
|
||||||
import { initNodesFromGraph } from './awareness.js';
|
|
||||||
|
|
||||||
let cy = null;
|
let cy = null;
|
||||||
let _dragEnabled = true;
|
let _dragEnabled = true;
|
||||||
// Maps HUD node names → graph node IDs (built from graph definition)
|
|
||||||
// e.g. {"eras_expert": "expert_eras", "pa_v1": "pa", "thinker_v2": "thinker"}
|
|
||||||
let _nodeNameToId = {};
|
|
||||||
let _physicsRunning = false;
|
let _physicsRunning = false;
|
||||||
let _physicsLayout = null;
|
let _physicsLayout = null;
|
||||||
let _colaSpacing = 25;
|
let _colaSpacing = 25;
|
||||||
@ -95,13 +90,6 @@ export async function initGraph() {
|
|||||||
if (resp.ok) {
|
if (resp.ok) {
|
||||||
const graph = await resp.json();
|
const graph = await resp.json();
|
||||||
graphElements = buildGraphElements(graph, mx, cw, mid, row1, row2);
|
graphElements = buildGraphElements(graph, mx, cw, mid, row1, row2);
|
||||||
initNodesFromGraph(graph);
|
|
||||||
// Build HUD name → graph ID mapping: {impl_name: role}
|
|
||||||
_nodeNameToId = {};
|
|
||||||
for (const [role, impl] of Object.entries(graph.nodes || {})) {
|
|
||||||
_nodeNameToId[impl] = role; // "eras_expert" → "expert_eras"
|
|
||||||
_nodeNameToId[role] = role; // "expert_eras" → "expert_eras"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (e) {}
|
} catch (e) {}
|
||||||
|
|
||||||
@ -161,24 +149,6 @@ export async function initGraph() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Animation queue: batch rapid events, play sequentially ---
|
|
||||||
const _animQueue = [];
|
|
||||||
let _animRunning = false;
|
|
||||||
const ANIM_INTERVAL = 200; // ms between queued animations
|
|
||||||
|
|
||||||
function _enqueue(fn) {
|
|
||||||
_animQueue.push(fn);
|
|
||||||
if (!_animRunning) _flushQueue();
|
|
||||||
}
|
|
||||||
|
|
||||||
function _flushQueue() {
|
|
||||||
if (!_animQueue.length) { _animRunning = false; return; }
|
|
||||||
_animRunning = true;
|
|
||||||
const fn = _animQueue.shift();
|
|
||||||
fn();
|
|
||||||
setTimeout(_flushQueue, ANIM_INTERVAL);
|
|
||||||
}
|
|
||||||
|
|
||||||
function pulseNode(id) {
|
function pulseNode(id) {
|
||||||
if (!cy) return;
|
if (!cy) return;
|
||||||
const node = cy.getElementById(id);
|
const node = cy.getElementById(id);
|
||||||
@ -197,29 +167,29 @@ function flashEdge(sourceId, targetId) {
|
|||||||
|
|
||||||
export function graphAnimate(event, node) {
|
export function graphAnimate(event, node) {
|
||||||
if (!cy) return;
|
if (!cy) return;
|
||||||
// Resolve HUD node name to graph ID (e.g. "eras_expert" → "expert_eras")
|
if (node && cy.getElementById(node).length) pulseNode(node);
|
||||||
const graphId = _nodeNameToId[node] || node;
|
|
||||||
_enqueue(() => {
|
|
||||||
if (graphId && cy.getElementById(graphId).length) pulseNode(graphId);
|
|
||||||
|
|
||||||
switch (event) {
|
switch (event) {
|
||||||
case 'perceived': pulseNode('input'); flashEdge('user', 'input'); break;
|
case 'perceived': pulseNode('input'); flashEdge('user', 'input'); break;
|
||||||
case 'decided':
|
case 'decided':
|
||||||
pulseNode(graphId); flashEdge(graphId, 'output');
|
if (node === 'director_v2' || node === 'director' || node === 'pa_v1') {
|
||||||
|
pulseNode(node); flashEdge(node, 'thinker');
|
||||||
|
} else {
|
||||||
|
pulseNode(node || 'thinker'); flashEdge('thinker', 'output');
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case 'routed': pulseNode(_nodeNameToId['pa_v1'] || 'pa'); break;
|
case 'routed': pulseNode('pa'); break;
|
||||||
case 'reflex_path': pulseNode('input'); flashEdge('input', 'output'); break;
|
case 'reflex_path': pulseNode('input'); flashEdge('input', 'output'); break;
|
||||||
case 'streaming': if (graphId === 'output') pulseNode('output'); break;
|
case 'streaming': if (node === 'output') pulseNode('output'); break;
|
||||||
case 'controls': case 'machine_created': case 'machine_transition':
|
case 'controls': case 'machine_created': case 'machine_transition':
|
||||||
pulseNode('ui'); break;
|
pulseNode('ui'); break;
|
||||||
case 'updated': pulseNode('memorizer'); flashEdge('output', 'memorizer'); break;
|
case 'updated': pulseNode('memorizer'); flashEdge('output', 'memorizer'); break;
|
||||||
case 'tool_call': pulseNode(graphId); break;
|
case 'tool_call': pulseNode(node || 'thinker'); break;
|
||||||
case 'tool_result': pulseNode(graphId); break;
|
case 'tool_result':
|
||||||
case 'thinking': pulseNode(graphId); break;
|
if (cy.getElementById('interpreter').length) pulseNode('interpreter'); break;
|
||||||
case 'planned': pulseNode(graphId); break;
|
case 'thinking': if (node) pulseNode(node); break;
|
||||||
case 'tick': pulseNode('sensor'); break;
|
case 'tick': pulseNode('sensor'); break;
|
||||||
}
|
}
|
||||||
}); // end _enqueue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function startPhysics() {
|
export function startPhysics() {
|
||||||
|
|||||||
@ -4,7 +4,6 @@ import { initAuth, authToken, startLogin } from './auth.js';
|
|||||||
import { initTrace, addTrace, clearTrace } from './trace.js';
|
import { initTrace, addTrace, clearTrace } from './trace.js';
|
||||||
import { initChat, clearChat } from './chat.js';
|
import { initChat, clearChat } from './chat.js';
|
||||||
import { clearDashboard } from './dashboard.js';
|
import { clearDashboard } from './dashboard.js';
|
||||||
import { clearNodes } from './awareness.js';
|
|
||||||
import { initGraph } from './graph.js';
|
import { initGraph } from './graph.js';
|
||||||
import { connect } from './ws.js';
|
import { connect } from './ws.js';
|
||||||
|
|
||||||
@ -13,13 +12,10 @@ window.addEventListener('load', async () => {
|
|||||||
initTrace();
|
initTrace();
|
||||||
initChat();
|
initChat();
|
||||||
await initGraph();
|
await initGraph();
|
||||||
await initAuth(() => {
|
await initAuth(() => connect());
|
||||||
connect();
|
|
||||||
loadGraphSwitcher();
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Clear session
|
// Clear session button
|
||||||
window.clearSession = async () => {
|
window.clearSession = async () => {
|
||||||
try {
|
try {
|
||||||
const headers = { 'Content-Type': 'application/json' };
|
const headers = { 'Content-Type': 'application/json' };
|
||||||
@ -28,63 +24,11 @@ window.clearSession = async () => {
|
|||||||
clearChat();
|
clearChat();
|
||||||
clearTrace();
|
clearTrace();
|
||||||
clearDashboard();
|
clearDashboard();
|
||||||
clearNodes();
|
|
||||||
addTrace('runtime', 'cleared', 'session reset');
|
addTrace('runtime', 'cleared', 'session reset');
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
addTrace('runtime', 'error', 'clear failed: ' + e);
|
addTrace('runtime', 'error', 'clear failed: ' + e);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Graph switcher — loads available graphs and shows buttons in top bar
|
// Login button
|
||||||
async function loadGraphSwitcher() {
|
|
||||||
const container = document.getElementById('graph-switcher');
|
|
||||||
if (!container) { console.error('[main] no #graph-switcher'); return; }
|
|
||||||
try {
|
|
||||||
const headers = {};
|
|
||||||
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
|
|
||||||
const r = await fetch('/api/graph/list', { headers });
|
|
||||||
if (!r.ok) { console.error('[main] graph/list failed:', r.status); return; }
|
|
||||||
const data = await r.json();
|
|
||||||
const graphs = data.graphs || data || [];
|
|
||||||
console.log('[main] graphs:', graphs.length);
|
|
||||||
|
|
||||||
// Get current active graph
|
|
||||||
let activeGraph = '';
|
|
||||||
try {
|
|
||||||
const ar = await fetch('/api/graph/active', { headers });
|
|
||||||
if (ar.ok) {
|
|
||||||
const ag = await ar.json();
|
|
||||||
activeGraph = ag.name || '';
|
|
||||||
}
|
|
||||||
} catch (e) {}
|
|
||||||
|
|
||||||
container.innerHTML = graphs.map(g => {
|
|
||||||
const active = g.name === activeGraph;
|
|
||||||
return `<button class="btn-graph${active ? ' active' : ''}" onclick="switchGraph('${g.name}')" title="${g.description}">${g.name}</button>`;
|
|
||||||
}).join('');
|
|
||||||
} catch (e) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
window.switchGraph = async (name) => {
|
|
||||||
try {
|
|
||||||
const headers = { 'Content-Type': 'application/json' };
|
|
||||||
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
|
|
||||||
await fetch('/api/graph/switch', {
|
|
||||||
method: 'POST', headers,
|
|
||||||
body: JSON.stringify({ name }),
|
|
||||||
});
|
|
||||||
addTrace('runtime', 'graph_switch', name);
|
|
||||||
clearChat();
|
|
||||||
clearTrace();
|
|
||||||
clearDashboard();
|
|
||||||
clearNodes();
|
|
||||||
addTrace('runtime', 'switched', `graph: ${name}`);
|
|
||||||
await initGraph();
|
|
||||||
loadGraphSwitcher();
|
|
||||||
} catch (e) {
|
|
||||||
addTrace('runtime', 'error', 'switch failed: ' + e);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Login
|
|
||||||
window.startLogin = startLogin;
|
window.startLogin = startLogin;
|
||||||
|
|||||||
@ -2,10 +2,10 @@
|
|||||||
|
|
||||||
import { authToken, isAuthFailed, setAuthFailed, showLogin } from './auth.js';
|
import { authToken, isAuthFailed, setAuthFailed, showLogin } from './auth.js';
|
||||||
import { addTrace } from './trace.js';
|
import { addTrace } from './trace.js';
|
||||||
import { addMsg, handleDelta, handleDone, setWs as setChatWs } from './chat.js';
|
import { handleDelta, handleDone, setWs as setChatWs } from './chat.js';
|
||||||
import { dockControls, dockArtifacts, setWs as setDashWs } from './dashboard.js';
|
import { dockControls, setWs as setDashWs } from './dashboard.js';
|
||||||
import { graphAnimate } from './graph.js';
|
import { graphAnimate } from './graph.js';
|
||||||
import { updateMeter, updateNodeFromHud, updateAwarenessState, updateAwarenessSensors } from './awareness.js';
|
import { updateMeter, updateAwarenessState, updateAwarenessSensors } from './awareness.js';
|
||||||
import { updateTestStatus } from './tests.js';
|
import { updateTestStatus } from './tests.js';
|
||||||
import { truncate, esc } from './util.js';
|
import { truncate, esc } from './util.js';
|
||||||
|
|
||||||
@ -30,14 +30,12 @@ export function connect() {
|
|||||||
setChatWs(ws);
|
setChatWs(ws);
|
||||||
setDashWs(ws);
|
setDashWs(ws);
|
||||||
connectDebugSockets();
|
connectDebugSockets();
|
||||||
restoreHistory();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
ws.onerror = () => {};
|
ws.onerror = () => {};
|
||||||
|
|
||||||
ws.onclose = (e) => {
|
ws.onclose = (e) => {
|
||||||
// 4001 = explicit auth rejection from server
|
if (e.code === 4001 || e.code === 1006) {
|
||||||
if (e.code === 4001) {
|
|
||||||
setAuthFailed(true);
|
setAuthFailed(true);
|
||||||
localStorage.removeItem('cog_token');
|
localStorage.removeItem('cog_token');
|
||||||
localStorage.removeItem('cog_access_token');
|
localStorage.removeItem('cog_access_token');
|
||||||
@ -46,10 +44,9 @@ export function connect() {
|
|||||||
showLogin();
|
showLogin();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// 1006 = abnormal close (deploy, network), just reconnect
|
document.getElementById('status').textContent = 'disconnected';
|
||||||
document.getElementById('status').textContent = 'reconnecting...';
|
document.getElementById('status').style.color = '#666';
|
||||||
document.getElementById('status').style.color = '#f59e0b';
|
addTrace('runtime', 'disconnected', 'ws closed');
|
||||||
addTrace('runtime', 'disconnected', `code ${e.code}, reconnecting...`);
|
|
||||||
setTimeout(connect, 2000);
|
setTimeout(connect, 2000);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -61,8 +58,6 @@ export function connect() {
|
|||||||
handleDelta(data.content);
|
handleDelta(data.content);
|
||||||
} else if (data.type === 'done') {
|
} else if (data.type === 'done') {
|
||||||
handleDone();
|
handleDone();
|
||||||
} else if (data.type === 'artifacts') {
|
|
||||||
dockArtifacts(data.artifacts);
|
|
||||||
} else if (data.type === 'controls') {
|
} else if (data.type === 'controls') {
|
||||||
dockControls(data.controls);
|
dockControls(data.controls);
|
||||||
} else if (data.type === 'cleared') {
|
} else if (data.type === 'cleared') {
|
||||||
@ -71,31 +66,6 @@ export function connect() {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function restoreHistory() {
|
|
||||||
try {
|
|
||||||
const headers = {};
|
|
||||||
if (authToken) headers['Authorization'] = 'Bearer ' + authToken;
|
|
||||||
const r = await fetch('/api/history?last=20', { headers });
|
|
||||||
if (!r.ok) return;
|
|
||||||
const data = await r.json();
|
|
||||||
const messages = data.messages || [];
|
|
||||||
if (!messages.length) return;
|
|
||||||
// Only restore if chat is empty (fresh load)
|
|
||||||
if (document.getElementById('messages').children.length > 0) return;
|
|
||||||
for (const msg of messages) {
|
|
||||||
const el = addMsg(msg.role, '');
|
|
||||||
if (msg.role === 'assistant') {
|
|
||||||
// Render as markdown
|
|
||||||
const { renderMarkdown } = await import('./util.js');
|
|
||||||
el.innerHTML = renderMarkdown(msg.content || '');
|
|
||||||
} else {
|
|
||||||
el.textContent = msg.content || '';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
addTrace('runtime', 'restored', `${messages.length} messages`);
|
|
||||||
} catch (e) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
function connectDebugSockets() {
|
function connectDebugSockets() {
|
||||||
const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
|
const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||||
const base = proto + '//' + location.host;
|
const base = proto + '//' + location.host;
|
||||||
@ -153,7 +123,6 @@ function handleHud(data) {
|
|||||||
const event = data.event || '';
|
const event = data.event || '';
|
||||||
|
|
||||||
graphAnimate(event, node);
|
graphAnimate(event, node);
|
||||||
updateNodeFromHud(node, event, data);
|
|
||||||
|
|
||||||
if (event === 'context') {
|
if (event === 'context') {
|
||||||
const count = (data.messages || []).length;
|
const count = (data.messages || []).length;
|
||||||
|
|||||||
@ -10,16 +10,10 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0;
|
|||||||
#test-status .ts-pass { color: #22c55e; }
|
#test-status .ts-pass { color: #22c55e; }
|
||||||
#test-status .ts-fail { color: #ef4444; }
|
#test-status .ts-fail { color: #ef4444; }
|
||||||
@keyframes pulse-text { 0%,100% { opacity: 1; } 50% { opacity: 0.5; } }
|
@keyframes pulse-text { 0%,100% { opacity: 1; } 50% { opacity: 0.5; } }
|
||||||
.btn-top { padding: 0.2rem 0.6rem; font-size: 0.7rem; background: #333; }
|
|
||||||
.btn-top:hover { background: #ef4444; }
|
|
||||||
#graph-switcher { display: flex; gap: 3px; }
|
|
||||||
.btn-graph { padding: 0.2rem 0.5rem; font-size: 0.65rem; font-family: monospace; background: #1a1a1a; color: #888; border: 1px solid #333; border-radius: 3px; cursor: pointer; }
|
|
||||||
.btn-graph:hover { color: #fff; border-color: #2563eb; }
|
|
||||||
.btn-graph.active { color: #22c55e; border-color: #22c55e; background: #0a1e14; }
|
|
||||||
|
|
||||||
/* === Two-row layout === */
|
/* === Two-row layout === */
|
||||||
/* Middle row: workspace | node detail | graph */
|
/* Middle row: workspace | node detail | graph */
|
||||||
#middle-row { display: grid; grid-template-columns: 1fr 300px 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; }
|
#middle-row { display: grid; grid-template-columns: 1fr 200px 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; }
|
||||||
/* Bottom row: chat | awareness | trace */
|
/* Bottom row: chat | awareness | trace */
|
||||||
#bottom-row { display: grid; grid-template-columns: 1fr 1fr 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; }
|
#bottom-row { display: grid; grid-template-columns: 1fr 1fr 2fr; gap: 1px; background: #222; flex: 1; min-height: 0; }
|
||||||
|
|
||||||
@ -42,19 +36,12 @@ body { font-family: system-ui, sans-serif; background: #0a0a0a; color: #e0e0e0;
|
|||||||
|
|
||||||
/* Node detail / metrics */
|
/* Node detail / metrics */
|
||||||
.detail-panel { display: flex; flex-direction: column; }
|
.detail-panel { display: flex; flex-direction: column; }
|
||||||
#node-metrics { flex: 1; overflow-y: auto; padding: 0.3rem; display: flex; flex-direction: column; gap: 2px; }
|
#node-metrics { flex: 1; overflow-y: auto; padding: 0.3rem; display: flex; flex-direction: column; gap: 1px; }
|
||||||
.node-card { background: #111; border-radius: 3px; padding: 0.25rem 0.4rem; border-left: 2px solid #333; }
|
.node-meter { display: flex; align-items: center; gap: 0.3rem; padding: 0.2rem 0.4rem; background: #111; border-radius: 2px; }
|
||||||
.node-card.nm-active { border-left-color: #f59e0b; background: #1a1408; }
|
.nm-label { font-size: 0.6rem; font-weight: 700; text-transform: uppercase; letter-spacing: 0.03em; min-width: 3.5rem; color: #888; }
|
||||||
.node-card.nm-streaming { border-left-color: #22c55e; background: #0a1e14; }
|
.nm-bar { flex: 1; height: 5px; background: #1a1a1a; border-radius: 3px; overflow: hidden; }
|
||||||
.nc-header { display: flex; align-items: center; gap: 0.3rem; }
|
.nm-fill { height: 100%; width: 0%; border-radius: 3px; transition: width 0.3s; background: #333; }
|
||||||
.nc-name { font-size: 0.65rem; font-weight: 700; text-transform: uppercase; color: #e0e0e0; min-width: 3rem; }
|
.nm-text { font-size: 0.55rem; color: #555; min-width: 3rem; text-align: right; font-family: monospace; }
|
||||||
.nc-model { font-size: 0.55rem; color: #666; font-family: monospace; }
|
|
||||||
.nc-tokens { font-size: 0.55rem; color: #555; font-family: monospace; margin-left: auto; }
|
|
||||||
.nc-bar { height: 3px; background: #1a1a1a; border-radius: 2px; overflow: hidden; margin: 2px 0; }
|
|
||||||
.nc-fill { height: 100%; border-radius: 2px; background: #333; transition: width 0.3s; }
|
|
||||||
.nc-status { display: flex; gap: 0.3rem; align-items: baseline; }
|
|
||||||
.nc-event { font-size: 0.55rem; color: #888; font-family: monospace; }
|
|
||||||
.nc-detail { font-size: 0.55rem; color: #666; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
|
||||||
|
|
||||||
/* Graph panel */
|
/* Graph panel */
|
||||||
.graph-panel { display: flex; flex-direction: column; }
|
.graph-panel { display: flex; flex-direction: column; }
|
||||||
@ -127,10 +114,6 @@ button:hover { background: #1d4ed8; }
|
|||||||
.aw-row { display: flex; justify-content: space-between; padding: 0.08rem 0; }
|
.aw-row { display: flex; justify-content: space-between; padding: 0.08rem 0; }
|
||||||
.aw-key { color: #888; font-size: 0.65rem; }
|
.aw-key { color: #888; font-size: 0.65rem; }
|
||||||
.aw-val { color: #e0e0e0; font-size: 0.7rem; font-weight: 500; }
|
.aw-val { color: #e0e0e0; font-size: 0.7rem; font-weight: 500; }
|
||||||
.aw-exp-conv { color: #4caf50; }
|
|
||||||
.aw-exp-deleg { color: #ff9800; }
|
|
||||||
.aw-exp-wait { color: #42a5f5; }
|
|
||||||
.aw-exp-obs { color: #9e9e9e; }
|
|
||||||
|
|
||||||
/* UI Controls (workspace) */
|
/* UI Controls (workspace) */
|
||||||
.controls-container { padding: 0.3rem 0; display: flex; flex-wrap: wrap; gap: 0.3rem; align-items: flex-start; }
|
.controls-container { padding: 0.3rem 0; display: flex; flex-wrap: wrap; gap: 0.3rem; align-items: flex-start; }
|
||||||
@ -147,51 +130,6 @@ button:hover { background: #1d4ed8; }
|
|||||||
.cd-label { color: #888; }
|
.cd-label { color: #888; }
|
||||||
.cd-value { color: #e0e0e0; margin-left: 0.5rem; }
|
.cd-value { color: #e0e0e0; margin-left: 0.5rem; }
|
||||||
|
|
||||||
/* Workspace cards */
|
|
||||||
.ws-card { background: #111; border: 1px solid #222; border-radius: 0.4rem; padding: 0.5rem 0.6rem; width: 100%; }
|
|
||||||
.ws-card-clickable { cursor: pointer; }
|
|
||||||
.ws-card-clickable:hover { border-color: #2563eb; background: #0a1628; }
|
|
||||||
.ws-card-title { font-size: 0.85rem; font-weight: 700; color: #e0e0e0; }
|
|
||||||
.ws-card-subtitle { font-size: 0.7rem; color: #888; margin-top: 0.1rem; }
|
|
||||||
.ws-card-fields { margin-top: 0.4rem; display: flex; flex-direction: column; gap: 0.15rem; }
|
|
||||||
.ws-card-field { display: flex; justify-content: space-between; font-size: 0.75rem; padding: 0.1rem 0; }
|
|
||||||
.ws-card-key { color: #888; }
|
|
||||||
.ws-card-val { color: #e0e0e0; font-weight: 500; }
|
|
||||||
.ws-card-link { color: #60a5fa; cursor: pointer; font-weight: 500; }
|
|
||||||
.ws-card-link:hover { text-decoration: underline; }
|
|
||||||
.ws-card-actions { margin-top: 0.4rem; display: flex; gap: 0.3rem; flex-wrap: wrap; }
|
|
||||||
.ws-card-btn { font-size: 0.7rem; padding: 0.2rem 0.5rem; }
|
|
||||||
.ws-list { display: flex; flex-direction: column; gap: 0.3rem; width: 100%; }
|
|
||||||
.ws-list-title { font-size: 0.75rem; font-weight: 700; color: #888; text-transform: uppercase; letter-spacing: 0.03em; margin-bottom: 0.2rem; }
|
|
||||||
.ws-card-nested { margin: 0; border-color: #1a1a2e; }
|
|
||||||
|
|
||||||
/* Artifact system */
|
|
||||||
.artifacts-container { padding: 0.3rem 0; display: flex; flex-direction: column; gap: 0.4rem; }
|
|
||||||
.ws-artifact { width: 100%; }
|
|
||||||
.ws-artifact-entity { background: #111; border: 1px solid #222; border-radius: 0.4rem; padding: 0.5rem 0.6rem; }
|
|
||||||
.ws-artifact-data_table { }
|
|
||||||
.ws-artifact-action_bar { display: flex; flex-wrap: wrap; gap: 0.3rem; }
|
|
||||||
.ws-artifact-status { padding: 0.25rem 0.4rem; font-size: 0.75rem; display: flex; align-items: center; gap: 0.4rem; }
|
|
||||||
.ws-artifact-header { font-size: 0.75rem; font-weight: 600; color: #888; margin-bottom: 0.2rem; }
|
|
||||||
.ws-artifact-fallback { font-size: 0.7rem; color: #666; font-family: monospace; white-space: pre-wrap; }
|
|
||||||
|
|
||||||
/* Document page artifact */
|
|
||||||
.ws-artifact-document_page { background: #111; border: 1px solid #222; border-radius: 0.4rem; padding: 0.8rem 1rem; }
|
|
||||||
.ws-doc-title { font-size: 1rem; font-weight: 700; color: #e0e0e0; margin-bottom: 0.6rem; border-bottom: 1px solid #333; padding-bottom: 0.4rem; }
|
|
||||||
.ws-doc-section { margin-bottom: 0.5rem; }
|
|
||||||
.ws-doc-heading { font-size: 0.8rem; font-weight: 700; color: #a78bfa; margin-bottom: 0.2rem; }
|
|
||||||
.ws-doc-content { font-size: 0.75rem; color: #ccc; line-height: 1.5; }
|
|
||||||
.ws-doc-content ul, .ws-doc-content ol { margin: 0.2rem 0; padding-left: 1.2rem; }
|
|
||||||
|
|
||||||
/* Machine artifact */
|
|
||||||
.ws-artifact-machine { background: #111; border: 1px solid #2563eb33; border-radius: 0.4rem; padding: 0.5rem 0.6rem; }
|
|
||||||
.ws-machine-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.3rem; }
|
|
||||||
.ws-machine-name { font-size: 0.75rem; font-weight: 600; color: #a78bfa; }
|
|
||||||
.ws-machine-state { font-size: 0.7rem; color: #60a5fa; background: #1e3a5f; padding: 0.1rem 0.4rem; border-radius: 0.2rem; }
|
|
||||||
.ws-machine-content { font-size: 0.75rem; color: #ccc; padding: 0.1rem 0; }
|
|
||||||
.ws-machine-data { display: flex; flex-wrap: wrap; gap: 0.3rem; margin-top: 0.2rem; }
|
|
||||||
.ws-machine-datum { font-size: 0.65rem; color: #888; background: #1a1a2e; padding: 0.1rem 0.3rem; border-radius: 0.2rem; }
|
|
||||||
|
|
||||||
/* Login overlay */
|
/* Login overlay */
|
||||||
#login-overlay { position: fixed; inset: 0; background: rgba(0,0,0,0.85); display: flex; align-items: center; justify-content: center; z-index: 1000; }
|
#login-overlay { position: fixed; inset: 0; background: rgba(0,0,0,0.85); display: flex; align-items: center; justify-content: center; z-index: 1000; }
|
||||||
.login-card { background: #1a1a1a; padding: 2rem; border-radius: 0.6rem; text-align: center; }
|
.login-card { background: #1a1a1a; padding: 2rem; border-radius: 0.6rem; text-align: center; }
|
||||||
|
|||||||
@ -1,33 +0,0 @@
|
|||||||
# Artifact System
|
|
||||||
|
|
||||||
Tests that the artifact rendering pipeline works end-to-end.
|
|
||||||
Expert produces data → UINode converts to artifacts → frontend renders.
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
- clear history
|
|
||||||
|
|
||||||
## Steps
|
|
||||||
|
|
||||||
### 1. Query produces data_table artifact
|
|
||||||
- send: show me 3 customers in a table
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 2. Entity detail via card
|
|
||||||
- send: show me details for customer 1
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 3. Action bar via buttons
|
|
||||||
- send: create two buttons on my dashboard: Refresh and Export
|
|
||||||
- expect_actions: length >= 2
|
|
||||||
- expect_actions: any action contains "refresh" or "Refresh"
|
|
||||||
|
|
||||||
### 4. Machine artifact
|
|
||||||
- send: create a machine called "flow" with initial state "ready" and a state called "done"
|
|
||||||
- expect_trace: has machine_created
|
|
||||||
|
|
||||||
### 5. Query after buttons survive
|
|
||||||
- send: how many customers are there?
|
|
||||||
- expect_response: length > 5
|
|
||||||
- expect_actions: any action contains "refresh" or "Refresh"
|
|
||||||
@ -1,46 +0,0 @@
|
|||||||
# Domain Context
|
|
||||||
|
|
||||||
Tests that the expert understands the Eras business domain:
|
|
||||||
Heizkostenabrechnung, Kunde→Objekt→Nutzeinheit→Geraet hierarchy,
|
|
||||||
and can formulate correct JOINs without guessing column names.
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
- clear history
|
|
||||||
|
|
||||||
## Steps
|
|
||||||
|
|
||||||
### 1. Expert knows the hierarchy
|
|
||||||
- send: wie viele Objekte haben Kunden im Durchschnitt?
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "doesn't exist"
|
|
||||||
- expect_response: length > 20
|
|
||||||
|
|
||||||
### 2. Expert can JOIN kunden and objekte
|
|
||||||
- send: zeig mir die Top 5 Kunden mit den meisten Objekten
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Error" or "error" or "Unknown column"
|
|
||||||
- expect_response: length > 20
|
|
||||||
|
|
||||||
### 3. Expert understands Nutzeinheiten belong to Objekte
|
|
||||||
- send: how many Nutzeinheiten does the system have total?
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Error" or "error" or "Unknown column"
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 4. Expert understands Geraete belong to Nutzeinheiten
|
|
||||||
- send: which Objekt has the most Geraete?
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Error" or "error" or "Unknown column"
|
|
||||||
- expect_response: length > 20
|
|
||||||
|
|
||||||
### 5. Multi-hop query through hierarchy
|
|
||||||
- send: zeig alle Nutzer in Objekten von Kunde mit Jaeger im Namen
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "doesn't exist"
|
|
||||||
- expect_response: contains "Jaeger" or "jaeger"
|
|
||||||
|
|
||||||
### 6. PA formulates good job descriptions
|
|
||||||
- send: gib mir eine Uebersicht ueber Kunde 2
|
|
||||||
- expect_trace: has routed
|
|
||||||
- expect_response: length > 20
|
|
||||||
- expect_response: not contains "clarify" or "specify" or "what kind"
|
|
||||||
@ -1,64 +0,0 @@
|
|||||||
# Eras Domain Mastery
|
|
||||||
|
|
||||||
Tests that the expert knows the schema cold — no DESCRIBE at runtime, no SQL errors,
|
|
||||||
domain-correct responses. The expert is a Heizkostenabrechnung specialist, not a SQL explorer.
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
- clear history
|
|
||||||
|
|
||||||
## Steps
|
|
||||||
|
|
||||||
### 1. Customer overview
|
|
||||||
- send: zeig mir die ersten 5 Kunden
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: length > 20
|
|
||||||
|
|
||||||
### 2. Objekte per Kunde (junction table)
|
|
||||||
- send: welcher Kunde hat die meisten Objekte?
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: length > 20
|
|
||||||
|
|
||||||
### 3. Nutzeinheiten in an Objekt
|
|
||||||
- send: wie viele Nutzeinheiten hat Objekt 4?
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: length > 5
|
|
||||||
|
|
||||||
### 4. Geraete count per Objekt
|
|
||||||
- send: welches Objekt hat die meisten Geraete?
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: length > 20
|
|
||||||
|
|
||||||
### 5. Full hierarchy traversal (4 tables)
|
|
||||||
- send: zeig mir alle Nutzer von Kunde 2
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 6. Address lookup via junction
|
|
||||||
- send: was ist die Adresse von Objekt 4?
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 7. Verbrauchsdaten query
|
|
||||||
- send: zeig mir die letzten 5 Verbrauchswerte von Geraet 100
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 8. Domain language response (not SQL dump)
|
|
||||||
- send: gib mir eine Zusammenfassung von Kunde 103
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "SELECT" or "JOIN" or "FROM"
|
|
||||||
- expect_response: length > 30
|
|
||||||
|
|
||||||
### 9. Expert does NOT describe at runtime
|
|
||||||
- send: wie viele Geraete hat Kunde 63?
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: not contains "DESCRIBE" or "describe"
|
|
||||||
- expect_response: length > 5
|
|
||||||
@ -1,50 +0,0 @@
|
|||||||
# Expectation Tracking
|
|
||||||
|
|
||||||
Tests that memorizer tracks user_expectation and it influences PA/Output behavior.
|
|
||||||
Exercises machine features (update_machine, transition_machine) alongside expectation transitions.
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
- clear history
|
|
||||||
|
|
||||||
## Steps
|
|
||||||
|
|
||||||
### 1. Greeting sets conversational
|
|
||||||
- send: hi there!
|
|
||||||
- expect_response: length > 2
|
|
||||||
- expect_state: user_expectation is "conversational"
|
|
||||||
|
|
||||||
### 2. Create a wizard machine
|
|
||||||
- send: create a machine called "project" with states: planning (initial) and executing
|
|
||||||
- expect_trace: has machine_created
|
|
||||||
|
|
||||||
### 3. Delegate a task
|
|
||||||
- send: build me a summary report of the top 5 customers by device count
|
|
||||||
- expect_response: length > 20
|
|
||||||
- expect_state: user_expectation is "delegated" or "observing"
|
|
||||||
|
|
||||||
### 4. Ask about wizard (status check stays in flow)
|
|
||||||
- send: what state is my project machine in?
|
|
||||||
- expect_response: contains "planning" or "project"
|
|
||||||
- expect_state: user_expectation is "conversational" or "delegated"
|
|
||||||
|
|
||||||
### 5. Store data on machine
|
|
||||||
- send: use update_machine to store status=in_progress on the project machine
|
|
||||||
- expect_response: length > 5
|
|
||||||
|
|
||||||
### 6. Transition machine
|
|
||||||
- send: use transition_machine to move project to executing state
|
|
||||||
- expect_response: length > 5
|
|
||||||
|
|
||||||
### 7. Verify machine state and data
|
|
||||||
- send: what is the current state and data of the project machine?
|
|
||||||
- expect_response: contains "executing" or "in_progress"
|
|
||||||
|
|
||||||
### 8. Short nudge triggers waiting_input
|
|
||||||
- send: und?
|
|
||||||
- expect_response: length > 5
|
|
||||||
- expect_state: user_expectation is "waiting_input" or "conversational"
|
|
||||||
|
|
||||||
### 9. Quick thanks (observing)
|
|
||||||
- send: ok danke
|
|
||||||
- expect_response: length > 0
|
|
||||||
- expect_state: user_expectation is "observing" or "observational" or "conversational"
|
|
||||||
@ -1,33 +0,0 @@
|
|||||||
# Expert Recovery
|
|
||||||
|
|
||||||
Tests that the expert recovers from SQL errors by retrying with corrected queries,
|
|
||||||
not by reporting the error and stopping.
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
- clear history
|
|
||||||
|
|
||||||
## Steps
|
|
||||||
|
|
||||||
### 1. Expert recovers from column error silently
|
|
||||||
- send: zeig mir alle Geraete von Objekt 4 mit Bezeichnung und Einbaudatum
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: length > 20
|
|
||||||
|
|
||||||
### 2. Multi-table query with potential errors
|
|
||||||
- send: zeig mir alle Nutzer und ihre Geraete fuer Kunde 2
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054" or "error" or "Error"
|
|
||||||
- expect_response: length > 20
|
|
||||||
|
|
||||||
### 3. Expert does not give up on first failure
|
|
||||||
- send: zeig mir Verbrauchswerte fuer Geraet 50 im letzten Monat
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "I need assistance" or "developer" or "schema issue"
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 4. Expert retries on unmapped table (abrechnungsinformationen)
|
|
||||||
- send: zeig mir die letzten 3 Abrechnungsinformationen
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_response: not contains "Unknown column" or "1054"
|
|
||||||
- expect_response: length > 10
|
|
||||||
@ -1,41 +0,0 @@
|
|||||||
# Machine State → PA Context
|
|
||||||
|
|
||||||
Tests that PA reads machine state when routing, and experts can write back to machines.
|
|
||||||
Validates: enriched machine summary, update_machine, transition_machine.
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
- clear history
|
|
||||||
|
|
||||||
## Steps
|
|
||||||
|
|
||||||
### 1. Create a machine
|
|
||||||
- send: create a navigation machine called "wizard" with initial state "start" and a second state called "details"
|
|
||||||
- expect_trace: has machine_created
|
|
||||||
|
|
||||||
### 2. PA sees machine in context
|
|
||||||
- send: what machines are active on my dashboard?
|
|
||||||
- expect_response: contains "wizard" or "start"
|
|
||||||
|
|
||||||
### 3. Expert stores data on machine
|
|
||||||
- send: use update_machine to store region=Bayern on the wizard machine
|
|
||||||
- expect_response: contains "Bayern" or "region" or "stored" or "updated"
|
|
||||||
|
|
||||||
### 4. PA sees stored data
|
|
||||||
- send: what data is stored in my wizard machine?
|
|
||||||
- expect_response: contains "Bayern" or "region"
|
|
||||||
|
|
||||||
### 5. Expert transitions machine to details
|
|
||||||
- send: use transition_machine to move wizard to details state
|
|
||||||
- expect_response: length > 5
|
|
||||||
|
|
||||||
### 6. PA sees updated state
|
|
||||||
- send: what state is the wizard in now?
|
|
||||||
- expect_response: contains "details"
|
|
||||||
|
|
||||||
### 7. Expert transitions back
|
|
||||||
- send: use transition_machine to move wizard back to start
|
|
||||||
- expect_response: length > 5
|
|
||||||
|
|
||||||
### 8. Final state check
|
|
||||||
- send: tell me the current wizard state and stored data
|
|
||||||
- expect_response: contains "start"
|
|
||||||
@ -1,19 +0,0 @@
|
|||||||
# PA Retry on Expert Failure
|
|
||||||
|
|
||||||
Tests that when expert fails, PA reformulates and retries with a different approach.
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
- clear history
|
|
||||||
|
|
||||||
## Steps
|
|
||||||
|
|
||||||
### 1. Complex analytical query that may need retry
|
|
||||||
- send: Finde KWZ-Geraete mit verdaechtigen Verbrauchsspruengen - also wo der Verbrauch zwischen zwei Ablesungen stark ansteigt
|
|
||||||
- expect_response: length > 20
|
|
||||||
|
|
||||||
### 2. Verify results contain device data
|
|
||||||
- expect_response: contains "Gera" or "gera" or "KWZ" or "kwz" or "Verbrauch" or "device"
|
|
||||||
|
|
||||||
### 3. Follow up with details
|
|
||||||
- send: zeig mir die Verbraeuche von einem dieser Geraete
|
|
||||||
- expect_response: length > 10
|
|
||||||
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"timestamp": "2026-03-30 00:02:55",
|
"timestamp": "2026-03-29 06:04:47",
|
||||||
"testcases": {
|
"testcases": {
|
||||||
"Artifact System": [
|
"S3* Audit Corrections": [
|
||||||
{
|
{
|
||||||
"step": "Setup",
|
"step": "Setup",
|
||||||
"check": "clear",
|
"check": "clear",
|
||||||
@ -9,355 +9,93 @@
|
|||||||
"detail": "cleared"
|
"detail": "cleared"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"step": "Query produces data_table artifact",
|
"step": "Tool calls produce results (baseline)",
|
||||||
"check": "send: show me 3 customers in a table",
|
"check": "send: create two buttons: Alpha and Beta",
|
||||||
"status": "PASS",
|
"status": "PASS",
|
||||||
"detail": "response: The database contains information for three customers: Kathrin Jager, Leon Schre"
|
"detail": "response: 👍 Okay, I've created buttons labeled \"Alpha\" and \"Beta\".\n"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"step": "Query produces data_table artifact",
|
"step": "Tool calls produce results (baseline)",
|
||||||
"check": "trace: has tool_call",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'tool_call'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Query produces data_table artifact",
|
|
||||||
"check": "response: length > 10",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 138 > 10"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Entity detail via card",
|
|
||||||
"check": "send: show me details for customer 1",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: ```tool_code\nquery_db({\"query\":\"SELECT * FROM customers WHERE customer_id = 1\"})"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Entity detail via card",
|
|
||||||
"check": "trace: has tool_call",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'tool_call'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Entity detail via card",
|
|
||||||
"check": "response: length > 10",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 84 > 10"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Action bar via buttons",
|
|
||||||
"check": "send: create two buttons on my dashboard: Refr",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: I have added the 'Refresh' and 'Export' buttons to your dashboard. These buttons"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Action bar via buttons",
|
|
||||||
"check": "actions: length >= 2",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "2 actions >= 2"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Action bar via buttons",
|
|
||||||
"check": "actions: any action contains \"refresh\" or \"Refresh\"",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found 'refresh' in actions"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Machine artifact",
|
|
||||||
"check": "send: create a machine called \"flow\" with init",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: OK, I've created a new interactive machine called 'flow' with the initial state "
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Machine artifact",
|
|
||||||
"check": "trace: has machine_created",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'machine_created'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Query after buttons survive",
|
|
||||||
"check": "send: how many customers are there?",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: There are 693 customers in the database.\n"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Query after buttons survive",
|
|
||||||
"check": "response: length > 5",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 41 > 5"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Query after buttons survive",
|
|
||||||
"check": "actions: any action contains \"refresh\" or \"Refresh\"",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found 'refresh' in actions"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"Fast v4": [
|
|
||||||
{
|
|
||||||
"step": "Setup",
|
|
||||||
"check": "clear",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "cleared"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Reflex",
|
|
||||||
"check": "send: hi!",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: Hey Nico! 👋 How can I help you today?\n"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Reflex",
|
|
||||||
"check": "response: length > 2",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 38 > 2"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "PA routes to expert",
|
|
||||||
"check": "send: show me 3 customers",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: Alright, I've fetched 3 customer records for you. You can see the ID, Name detai"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "PA routes to expert",
|
|
||||||
"check": "trace: has routed",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'routed'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "PA routes to expert",
|
|
||||||
"check": "trace: has tool_call",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'tool_call'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "PA routes to expert",
|
|
||||||
"check": "response: length > 10",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 181 > 10"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "German query",
|
|
||||||
"check": "send: Zeig mir alle Tabellen in der Datenbank",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: Okay, ich habe eine Liste aller Tabellen in der \"eras2_production\" Datenbank abg"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "German query",
|
|
||||||
"check": "trace: has tool_call",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'tool_call'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "German query",
|
|
||||||
"check": "response: length > 10",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 303 > 10"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Schema discovery",
|
|
||||||
"check": "send: describe the kunden table",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: The `kunden` table stores customer information, including names, customer number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Schema discovery",
|
|
||||||
"check": "trace: has tool_call",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'tool_call'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Schema discovery",
|
|
||||||
"check": "response: length > 10",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 391 > 10"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Count query (cached schema)",
|
|
||||||
"check": "send: how many customers are there?",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: There are 693 customers in the database.\n"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Count query (cached schema)",
|
|
||||||
"check": "trace: has tool_call",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'tool_call'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Count query (cached schema)",
|
|
||||||
"check": "response: length > 5",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 41 > 5"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Complex query",
|
|
||||||
"check": "send: which customers have the most devices?",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: The query results list the top 10 customers with the most devices. Anne Bürger h"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Complex query",
|
|
||||||
"check": "trace: has tool_call",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'tool_call'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Complex query",
|
|
||||||
"check": "response: length > 20",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 166 > 20"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Error recovery",
|
|
||||||
"check": "send: SELECT * FROM nichtexistiert",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: Oops! It seems like the table `nichtexistiert` doesn't exist in the database. 🤔 "
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Error recovery",
|
|
||||||
"check": "trace: has tool_call",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'tool_call'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Error recovery",
|
|
||||||
"check": "response: length > 10",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 396 > 10"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Memorizer",
|
|
||||||
"check": "send: my name is Nico",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: Thanks, Nico! I'll remember that. Do you have any other questions? 😊\n"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Memorizer",
|
|
||||||
"check": "state: facts any contains \"Nico\"",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found 'Nico' in facts"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Language switch",
|
|
||||||
"check": "send: Hallo wie gehts?",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: Mir geht es gut, danke der Nachfrage, Nico! Und selbst? Gibt es etwas, bei dem i"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Language switch",
|
|
||||||
"check": "state: language is \"de\" or \"mixed\"",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "language=mixed"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Bye",
|
|
||||||
"check": "send: ok bye",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: Bye Nico! 👋 If you need anything else, just let me know. 😊\n"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Bye",
|
|
||||||
"check": "response: length > 2",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 59 > 2"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"Dashboard Integration": [
|
|
||||||
{
|
|
||||||
"step": "Setup",
|
|
||||||
"check": "clear",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "cleared"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert creates buttons",
|
|
||||||
"check": "send: create two buttons on my dashboard: Repo",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: I have added 'Report' and 'Export' buttons to your dashboard.\n\n(UI buttons shown"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert creates buttons",
|
|
||||||
"check": "actions: length >= 2",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "3 actions >= 2"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert creates buttons",
|
|
||||||
"check": "actions: any action contains \"report\" or \"Report\"",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found 'report' in actions"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Buttons survive a query",
|
|
||||||
"check": "send: how many customers are there?",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: I'm running a query to count all customer IDs. One moment...\n"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Buttons survive a query",
|
|
||||||
"check": "response: length > 5",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 61 > 5"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Buttons survive a query",
|
|
||||||
"check": "actions: any action contains \"report\" or \"Report\"",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found 'report' in actions"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert creates a machine",
|
|
||||||
"check": "send: create a navigation machine called \"work",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: I've created the 'workflow' machine with 'start' and 'step2' states. The 'start'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert creates a machine",
|
|
||||||
"check": "trace: has tool_call create_machine",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found create_machine via machine_created event"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert shows data table",
|
|
||||||
"check": "send: show me 5 customers in a table",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: Here are five customer entries with their IDs, names, object count, and status:\n"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert shows data table",
|
|
||||||
"check": "trace: has tool_call",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "found event 'tool_call'"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert shows data table",
|
|
||||||
"check": "response: length > 10",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "length 118 > 10"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert replaces buttons",
|
|
||||||
"check": "send: remove all buttons and create one button",
|
|
||||||
"status": "PASS",
|
|
||||||
"detail": "response: I have removed the existing 'Report' and 'Export' buttons from the dashboard and"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"step": "Expert replaces buttons",
|
|
||||||
"check": "actions: length >= 1",
|
"check": "actions: length >= 1",
|
||||||
"status": "PASS",
|
"status": "PASS",
|
||||||
"detail": "2 actions >= 1"
|
"detail": "2 actions >= 1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"step": "Expert replaces buttons",
|
"step": "Tool calls produce results (baseline)",
|
||||||
"check": "actions: any action contains \"reset\" or \"Reset\"",
|
"check": "actions: any action contains \"alpha\" or \"Alpha\"",
|
||||||
"status": "PASS",
|
"status": "PASS",
|
||||||
"detail": "found 'reset' in actions"
|
"detail": "found 'alpha' in actions"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "Dashboard mismatch triggers re-emit",
|
||||||
|
"check": "send: I see nothing on my dashboard, fix it",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "response: 👍 Done — Alpha and Beta buttons are now live on your dashboard. They should appe"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "Dashboard mismatch triggers re-emit",
|
||||||
|
"check": "response: not contains \"sorry\" or \"apologize\"",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "none of ['sorry', 'apologize'] found (as expected)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "Dashboard mismatch triggers re-emit",
|
||||||
|
"check": "actions: length >= 1",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "2 actions >= 1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "DB error triggers retry with corrected SQL",
|
||||||
|
"check": "send: SELECT * FROM NichtExistent LIMIT 5",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "response: Ah, it seems like the table `NichtExistent` does not exist. Double-check the tab"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "DB error triggers retry with corrected SQL",
|
||||||
|
"check": "trace: has tool_call",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "found event 'tool_call'"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "DB error triggers retry with corrected SQL",
|
||||||
|
"check": "response: not contains \"1146\"",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "none of ['1146'] found (as expected)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "DB error triggers retry with corrected SQL",
|
||||||
|
"check": "response: length > 10",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "length 163 > 10"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "Complex request gets Director plan",
|
||||||
|
"check": "send: investigate which customers have the mos",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "response: Okay, I'll look into which customers have the most devices. This might take a mo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "Complex request gets Director plan",
|
||||||
|
"check": "trace: has director_plan",
|
||||||
|
"status": "FAIL",
|
||||||
|
"detail": "no 'director_plan' event in trace"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "Complex request gets Director plan",
|
||||||
|
"check": "trace: has tool_call",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "found event 'tool_call'"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"step": "Complex request gets Director plan",
|
||||||
|
"check": "response: length > 20",
|
||||||
|
"status": "PASS",
|
||||||
|
"detail": "length 86 > 20"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"summary": {
|
"summary": {
|
||||||
"passed": 58,
|
"passed": 14,
|
||||||
"failed": 0
|
"failed": 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,40 +0,0 @@
|
|||||||
# Workspace Components
|
|
||||||
|
|
||||||
Tests that the expert emits structured UI components (cards, lists, tables)
|
|
||||||
instead of dumping text or raw SQL. The workspace should show domain-aware displays.
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
- clear history
|
|
||||||
|
|
||||||
## Steps
|
|
||||||
|
|
||||||
### 1. Detail card for a single entity
|
|
||||||
- send: zeig mir Details zu Kunde 2
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_actions: has card
|
|
||||||
- expect_response: not contains "SELECT" or "JOIN"
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 2. List of items with navigation
|
|
||||||
- send: zeig mir alle Objekte von Kunde 2
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_actions: has card or has table
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 3. Table for tabular data
|
|
||||||
- send: zeig mir die Geraete von Objekt 4
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_actions: has table
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 4. Card with actions (drill-down buttons)
|
|
||||||
- send: zeig mir Auftrag 21479
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_actions: length >= 1
|
|
||||||
- expect_response: length > 10
|
|
||||||
|
|
||||||
### 5. Summary card with key metrics
|
|
||||||
- send: gib mir eine Zusammenfassung von Objekt 4
|
|
||||||
- expect_trace: has tool_call
|
|
||||||
- expect_actions: has card
|
|
||||||
- expect_response: length > 20
|
|
||||||
Loading…
x
Reference in New Issue
Block a user