Rename cog -> assay across codebase

- Rename files: cog_cli.py, test_cog.py, k8s/cog-*.yaml
- Update all Python tool names: cog_* -> assay_*
- Update FastAPI titles, MCP server names, URLs
- Update K8s manifests: deployments, services, secrets, ingress
- Update Docker env vars: COG_API -> ASSAY_API
- Domain: cog.loop42.de -> assay.loop42.de
- SQLite path: /tmp/cog_db.sqlite -> /tmp/assay_db.sqlite

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nico 2026-03-31 01:39:05 +02:00
parent 925fff731f
commit bf11312b4b
17 changed files with 139 additions and 303 deletions

13
Dockerfile.test Normal file
View File

@ -0,0 +1,13 @@
FROM python:3.12-slim
WORKDIR /app
RUN pip install --no-cache-dir httpx websockets
COPY runtime_test.py .
COPY testcases/ testcases/
ENV ASSAY_API=http://runtime:8000/api
ENTRYPOINT ["python3", "runtime_test.py"]
CMD ["testcases/fast_v4.md"]

View File

@ -1,4 +1,4 @@
"""Cognitive Agent Runtime — modular package.
"""assay — agent runtime.
uvicorn entrypoint: agent:app
"""
@ -30,13 +30,13 @@ class NoCacheStaticMiddleware(BaseHTTPMiddleware):
STATIC_DIR = Path(__file__).parent.parent / "static"
app = FastAPI(title="cog")
app = FastAPI(title="assay")
# Register all API + WS routes
register_routes(app)
# Serve frontend from same process (fallback for non-split deploy)
# When running behind cog-frontend nginx, these paths won't be hit
# When running behind assay-frontend nginx, these paths won't be hit
@app.get("/")
async def index():
resp = FileResponse(STATIC_DIR / "index.html")

View File

@ -1,10 +1,11 @@
"""Shared database access for Thinker and Expert nodes."""
import logging
import os
log = logging.getLogger("runtime")
DB_HOST = "mariadb-eras"
DB_HOST = os.environ.get("DB_HOST", "mariadb-eras")
DB_USER = "root"
DB_PASS = "root"
ALLOWED_DATABASES = ("eras2_production", "plankiste_test")

View File

@ -1,4 +1,4 @@
"""Standalone MCP SSE app — proxies tool calls to cog-runtime."""
"""Standalone MCP SSE app — proxies tool calls to assay-runtime."""
import json
import logging
@ -20,11 +20,11 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message
log = logging.getLogger("mcp-proxy")
# Config
RUNTIME_URL = os.environ.get("RUNTIME_URL", "http://cog-runtime")
RUNTIME_URL = os.environ.get("RUNTIME_URL", "http://assay-runtime")
SERVICE_TOKENS = set(filter(None, os.environ.get("SERVICE_TOKENS", "").split(",")))
SERVICE_TOKEN = os.environ.get("SERVICE_TOKENS", "").split(",")[0] if os.environ.get("SERVICE_TOKENS") else ""
app = FastAPI(title="cog-mcp")
app = FastAPI(title="assay-mcp")
_security = HTTPBearer()
@ -42,7 +42,7 @@ async def health():
# --- MCP Server ---
mcp_server = Server("cog")
mcp_server = Server("assay")
_mcp_transport = SseServerTransport("/mcp/messages/")
@ -87,29 +87,29 @@ async def _proxy_post(path: str, body: dict = None) -> dict:
@mcp_server.list_tools()
async def list_tools():
return [
Tool(name="cog_send", description="Send a message to the cognitive agent and get a response.",
Tool(name="assay_send", description="Send a message to the cognitive agent and get a response.",
inputSchema={"type": "object", "properties": {
"text": {"type": "string", "description": "Message text to send"},
"database": {"type": "string", "description": "Optional: database name for query_db context"},
}, "required": ["text"]}),
Tool(name="cog_trace", description="Get recent trace events from the pipeline (HUD events, tool calls, audit).",
Tool(name="assay_trace", description="Get recent trace events from the pipeline (HUD events, tool calls, audit).",
inputSchema={"type": "object", "properties": {
"last": {"type": "integer", "description": "Number of recent events (default 20)", "default": 20},
"filter": {"type": "string", "description": "Comma-separated event types to filter (e.g. 'tool_call,controls')"},
}}),
Tool(name="cog_history", description="Get recent chat messages from the active session.",
Tool(name="assay_history", description="Get recent chat messages from the active session.",
inputSchema={"type": "object", "properties": {
"last": {"type": "integer", "description": "Number of recent messages (default 20)", "default": 20},
}}),
Tool(name="cog_state", description="Get the current memorizer state (mood, topic, language, facts).",
Tool(name="assay_state", description="Get the current memorizer state (mood, topic, language, facts).",
inputSchema={"type": "object", "properties": {}}),
Tool(name="cog_clear", description="Clear the active session (history, state, controls).",
Tool(name="assay_clear", description="Clear the active session (history, state, controls).",
inputSchema={"type": "object", "properties": {}}),
Tool(name="cog_graph", description="Get the active graph definition (nodes, edges, description).",
Tool(name="assay_graph", description="Get the active graph definition (nodes, edges, description).",
inputSchema={"type": "object", "properties": {}}),
Tool(name="cog_graph_list", description="List all available graph definitions.",
Tool(name="assay_graph_list", description="List all available graph definitions.",
inputSchema={"type": "object", "properties": {}}),
Tool(name="cog_graph_switch", description="Switch the active graph for new sessions.",
Tool(name="assay_graph_switch", description="Switch the active graph for new sessions.",
inputSchema={"type": "object", "properties": {
"name": {"type": "string", "description": "Graph name to switch to"},
}, "required": ["name"]}),
@ -118,7 +118,7 @@ async def list_tools():
@mcp_server.call_tool()
async def call_tool(name: str, arguments: dict):
if name == "cog_send":
if name == "assay_send":
text = arguments.get("text", "")
if not text:
return [TextContent(type="text", text="ERROR: Missing 'text' argument.")]
@ -150,7 +150,7 @@ async def call_tool(name: str, arguments: dict):
return [TextContent(type="text", text=f"ERROR: {result.get('detail', 'pipeline failed')}")]
return [TextContent(type="text", text="ERROR: Pipeline timeout (30s)")]
elif name == "cog_trace":
elif name == "assay_trace":
last = arguments.get("last", 20)
event_filter = arguments.get("filter", "")
params = {"last": last}
@ -170,38 +170,38 @@ async def call_tool(name: str, arguments: dict):
lines.append(line.rstrip())
return [TextContent(type="text", text="\n".join(lines) if lines else "(no events)")]
elif name == "cog_history":
elif name == "assay_history":
last = arguments.get("last", 20)
result = await _proxy_get("/api/history", {"last": last})
if "error" in result:
return [TextContent(type="text", text=f"ERROR: {result['error']}")]
return [TextContent(type="text", text=json.dumps(result.get("messages", []), indent=2))]
elif name == "cog_state":
elif name == "assay_state":
result = await _proxy_get("/api/state")
if "error" in result:
return [TextContent(type="text", text=f"ERROR: {result['error']}")]
return [TextContent(type="text", text=json.dumps(result, indent=2))]
elif name == "cog_clear":
elif name == "assay_clear":
result = await _proxy_post("/api/clear")
if "error" in result:
return [TextContent(type="text", text=f"ERROR: {result['error']}")]
return [TextContent(type="text", text="Session cleared.")]
elif name == "cog_graph":
elif name == "assay_graph":
result = await _proxy_get("/api/graph/active")
if "error" in result:
return [TextContent(type="text", text=f"ERROR: {result['error']}")]
return [TextContent(type="text", text=json.dumps(result, indent=2))]
elif name == "cog_graph_list":
elif name == "assay_graph_list":
result = await _proxy_get("/api/graph/list")
if "error" in result:
return [TextContent(type="text", text=f"ERROR: {result['error']}")]
return [TextContent(type="text", text=json.dumps(result.get("graphs", []), indent=2))]
elif name == "cog_graph_switch":
elif name == "assay_graph_switch":
gname = arguments.get("name", "")
if not gname:
return [TextContent(type="text", text="ERROR: Missing 'name' argument.")]

View File

@ -1,4 +1,4 @@
"""MCP server for cog — exposes runtime tools to any MCP client."""
"""MCP server for assay — exposes runtime tools to any MCP client."""
import json
import logging
@ -12,7 +12,7 @@ log = logging.getLogger("mcp")
TRACE_FILE = Path(__file__).parent.parent / "trace.jsonl"
server = Server("cog")
server = Server("assay")
# Reference to active runtime — set by api.py when WS connects
_get_runtime = lambda: None
@ -26,29 +26,29 @@ def set_runtime_getter(fn):
@server.list_tools()
async def list_tools():
return [
Tool(name="cog_send", description="Send a message to the cognitive agent and get a response.",
Tool(name="assay_send", description="Send a message to the cognitive agent and get a response.",
inputSchema={"type": "object", "properties": {
"text": {"type": "string", "description": "Message text to send"},
"database": {"type": "string", "description": "Optional: database name for query_db context"},
}, "required": ["text"]}),
Tool(name="cog_trace", description="Get recent trace events from the pipeline (HUD events, tool calls, audit).",
Tool(name="assay_trace", description="Get recent trace events from the pipeline (HUD events, tool calls, audit).",
inputSchema={"type": "object", "properties": {
"last": {"type": "integer", "description": "Number of recent events (default 20)", "default": 20},
"filter": {"type": "string", "description": "Comma-separated event types to filter (e.g. 'tool_call,controls')"},
}}),
Tool(name="cog_history", description="Get recent chat messages from the active session.",
Tool(name="assay_history", description="Get recent chat messages from the active session.",
inputSchema={"type": "object", "properties": {
"last": {"type": "integer", "description": "Number of recent messages (default 20)", "default": 20},
}}),
Tool(name="cog_state", description="Get the current memorizer state (mood, topic, language, facts).",
Tool(name="assay_state", description="Get the current memorizer state (mood, topic, language, facts).",
inputSchema={"type": "object", "properties": {}}),
Tool(name="cog_clear", description="Clear the active session (history, state, controls).",
Tool(name="assay_clear", description="Clear the active session (history, state, controls).",
inputSchema={"type": "object", "properties": {}}),
Tool(name="cog_graph", description="Get the active graph definition (nodes, edges, description).",
Tool(name="assay_graph", description="Get the active graph definition (nodes, edges, description).",
inputSchema={"type": "object", "properties": {}}),
Tool(name="cog_graph_list", description="List all available graph definitions.",
Tool(name="assay_graph_list", description="List all available graph definitions.",
inputSchema={"type": "object", "properties": {}}),
Tool(name="cog_graph_switch", description="Switch the active graph for new sessions.",
Tool(name="assay_graph_switch", description="Switch the active graph for new sessions.",
inputSchema={"type": "object", "properties": {
"name": {"type": "string", "description": "Graph name to switch to"},
}, "required": ["name"]}),
@ -59,7 +59,7 @@ async def list_tools():
async def call_tool(name: str, arguments: dict):
runtime = _get_runtime()
if name == "cog_send":
if name == "assay_send":
if not runtime:
return [TextContent(type="text", text="ERROR: No active session — someone must be connected via WebSocket first.")]
text = arguments.get("text", "").strip()
@ -69,7 +69,7 @@ async def call_tool(name: str, arguments: dict):
response = runtime.history[-1]["content"] if runtime.history else "(no response)"
return [TextContent(type="text", text=response)]
elif name == "cog_trace":
elif name == "assay_trace":
last = arguments.get("last", 20)
filt = arguments.get("filter", "").split(",") if arguments.get("filter") else None
if not TRACE_FILE.exists():
@ -107,7 +107,7 @@ async def call_tool(name: str, arguments: dict):
out.append(f"{node:12} {event:20} {str(detail)[:120]}")
return [TextContent(type="text", text="\n".join(out) if out else "(no matching events)")]
elif name == "cog_history":
elif name == "assay_history":
if not runtime:
return [TextContent(type="text", text="(no active session)")]
last = arguments.get("last", 20)
@ -119,12 +119,12 @@ async def call_tool(name: str, arguments: dict):
out.append("")
return [TextContent(type="text", text="\n".join(out) if out else "(no messages)")]
elif name == "cog_state":
elif name == "assay_state":
if not runtime:
return [TextContent(type="text", text="(no active session)")]
return [TextContent(type="text", text=json.dumps(runtime.memorizer.state, indent=2, ensure_ascii=False))]
elif name == "cog_clear":
elif name == "assay_clear":
if not runtime:
return [TextContent(type="text", text="ERROR: No active session.")]
runtime.history.clear()
@ -134,7 +134,7 @@ async def call_tool(name: str, arguments: dict):
runtime.ui_node.machines.clear()
return [TextContent(type="text", text="Session cleared.")]
elif name == "cog_graph":
elif name == "assay_graph":
from .engine import load_graph, get_graph_for_cytoscape
from .runtime import _active_graph_name
graph = load_graph(_active_graph_name)
@ -147,11 +147,11 @@ async def call_tool(name: str, arguments: dict):
"audit": graph.get("audit", {}),
}, indent=2))]
elif name == "cog_graph_list":
elif name == "assay_graph_list":
from .engine import list_graphs
return [TextContent(type="text", text=json.dumps(list_graphs(), indent=2))]
elif name == "cog_graph_switch":
elif name == "assay_graph_switch":
from .engine import load_graph
import agent.runtime as rt
gname = arguments.get("name", "")

View File

@ -340,7 +340,7 @@ CRITICAL RULES:
if code and len(code.split("\n")) > 0:
if lang in ("sql", "sqlite"):
wrapped = f'''import sqlite3
conn = sqlite3.connect("/tmp/cog_db.sqlite")
conn = sqlite3.connect("/tmp/assay_db.sqlite")
cursor = conn.cursor()
for stmt in """{code}""".split(";"):
stmt = stmt.strip()

View File

@ -1,10 +1,10 @@
"""CLI helper for reading cog API — trace, history, state, send."""
"""CLI helper for reading assay API — trace, history, state, send."""
import json
import sys
import httpx
API = "https://cog.loop42.de"
API = "https://assay.loop42.de"
TOKEN = "7Oorb9S3OpwFyWgm4zi_Tq7GeamefbjjTgooPVPWAwPDOf6B4TvgvQlLbhmT4DjsqBS_D1g"
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
@ -107,7 +107,7 @@ def graph():
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: cog_cli.py <command> [args]")
print("Usage: assay_cli.py <command> [args]")
print(" trace [last] [event_filter] — show trace events")
print(" history [last] — show chat history")
print(" state — show memorizer state")

View File

@ -3,10 +3,16 @@ services:
build: .
ports:
- "8000:8000"
volumes:
- ./agent.py:/app/agent.py
- ./static:/app/static
- ./trace.jsonl:/app/trace.jsonl
env_file:
- .env
restart: unless-stopped
tester:
build:
context: .
dockerfile: Dockerfile.test
environment:
- ASSAY_API=http://runtime:8000/api
depends_on:
- runtime
profiles: ["test"]

View File

@ -1,13 +1,13 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: cog-frontend
name: assay-frontend
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: cog-frontend
app: assay-frontend
strategy:
type: RollingUpdate
rollingUpdate:
@ -16,11 +16,11 @@ spec:
template:
metadata:
labels:
app: cog-frontend
app: assay-frontend
spec:
containers:
- name: nginx
image: docker.io/library/cog-frontend:latest
image: docker.io/library/assay-frontend:latest
imagePullPolicy: Never
ports:
- containerPort: 80
@ -47,11 +47,11 @@ spec:
apiVersion: v1
kind: Service
metadata:
name: cog-frontend
name: assay-frontend
namespace: default
spec:
selector:
app: cog-frontend
app: assay-frontend
ports:
- port: 80
targetPort: 80

View File

@ -1,7 +1,7 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: cog-runtime
name: assay-runtime
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
traefik.ingress.kubernetes.io/router.entrypoints: websecure
@ -9,11 +9,11 @@ metadata:
spec:
ingressClassName: traefik
tls:
- secretName: cog-tls
- secretName: assay-tls
hosts:
- cog.loop42.de
- assay.loop42.de
rules:
- host: cog.loop42.de
- host: assay.loop42.de
http:
paths:
# MCP SSE — separate pod, survives runtime restarts
@ -21,7 +21,7 @@ spec:
pathType: Prefix
backend:
service:
name: cog-mcp
name: assay-mcp
port:
number: 80
# WebSocket + REST API — runtime pod
@ -29,28 +29,28 @@ spec:
pathType: Prefix
backend:
service:
name: cog-runtime
name: assay-runtime
port:
number: 80
- path: /api
pathType: Prefix
backend:
service:
name: cog-runtime
name: assay-runtime
port:
number: 80
- path: /health
pathType: Prefix
backend:
service:
name: cog-runtime
name: assay-runtime
port:
number: 80
- path: /auth
pathType: Prefix
backend:
service:
name: cog-runtime
name: assay-runtime
port:
number: 80
# Frontend — nginx, catch-all (must be last)
@ -58,6 +58,6 @@ spec:
pathType: Prefix
backend:
service:
name: cog-frontend
name: assay-frontend
port:
number: 80

View File

@ -1,13 +1,13 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: cog-mcp
name: assay-mcp
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: cog-mcp
app: assay-mcp
strategy:
type: RollingUpdate
rollingUpdate:
@ -16,11 +16,11 @@ spec:
template:
metadata:
labels:
app: cog-mcp
app: assay-mcp
spec:
containers:
- name: mcp
image: docker.io/library/loop42-agent:v0.13.0
image: docker.io/library/loop42-agent:v0.18.0
imagePullPolicy: Never
command: ["uvicorn", "agent.mcp_app:app", "--host", "0.0.0.0", "--port", "8001"]
ports:
@ -29,10 +29,10 @@ spec:
- name: SERVICE_TOKENS
value: 7Oorb9S3OpwFyWgm4zi_Tq7GeamefbjjTgooPVPWAwPDOf6B4TvgvQlLbhmT4DjsqBS_D1g
- name: RUNTIME_URL
value: "http://cog-runtime"
value: "http://assay-runtime"
envFrom:
- secretRef:
name: cog-runtime-env
name: assay-runtime-env
readinessProbe:
httpGet:
path: /health
@ -56,11 +56,11 @@ spec:
apiVersion: v1
kind: Service
metadata:
name: cog-mcp
name: assay-mcp
namespace: default
spec:
selector:
app: cog-mcp
app: assay-mcp
ports:
- port: 80
targetPort: 8001

View File

@ -1,13 +1,13 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: cog-runtime
name: assay-runtime
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: cog-runtime
app: assay-runtime
strategy:
type: RollingUpdate
rollingUpdate:
@ -16,11 +16,11 @@ spec:
template:
metadata:
labels:
app: cog-runtime
app: assay-runtime
spec:
containers:
- name: agent
image: docker.io/library/loop42-agent:v0.13.0
image: docker.io/library/loop42-agent:v0.18.0
imagePullPolicy: Never
ports:
- containerPort: 8000
@ -31,7 +31,7 @@ spec:
value: 7Oorb9S3OpwFyWgm4zi_Tq7GeamefbjjTgooPVPWAwPDOf6B4TvgvQlLbhmT4DjsqBS_D1g
envFrom:
- secretRef:
name: cog-runtime-env
name: assay-runtime-env
readinessProbe:
httpGet:
path: /health
@ -55,11 +55,11 @@ spec:
apiVersion: v1
kind: Service
metadata:
name: cog-runtime
name: assay-runtime
namespace: default
spec:
selector:
app: cog-runtime
app: assay-runtime
ports:
- port: 80
targetPort: 8000

View File

@ -1,5 +1,5 @@
"""
Cog Runtime Test Runner parses testcases/*.md and executes against live API.
Assay Runtime Test Runner parses testcases/*.md and executes against live API.
Usage:
pytest testcases/ -v # run all testcases
@ -21,7 +21,7 @@ if sys.platform == "win32":
from pathlib import Path
from dataclasses import dataclass, field
API = "https://cog.loop42.de/api"
API = os.environ.get("ASSAY_API", "https://assay.loop42.de/api")
TOKEN = "7Oorb9S3OpwFyWgm4zi_Tq7GeamefbjjTgooPVPWAwPDOf6B4TvgvQlLbhmT4DjsqBS_D1g"
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
@ -136,7 +136,7 @@ def _parse_command(text: str) -> dict | None:
# --- API client ---
class CogClient:
class AssayClient:
def __init__(self):
self.client = httpx.Client(timeout=90)
self.last_response = ""
@ -430,9 +430,9 @@ class StepResult:
detail: str = ""
class CogTestRunner:
class AssayTestRunner:
def __init__(self, on_result=None):
self.client = CogClient()
self.client = AssayClient()
self._on_result = on_result # callback(result_dict) per check
def run(self, testcase: dict) -> list[dict]:
@ -568,7 +568,7 @@ def run_standalone(paths: list[Path] = None):
print(f" {r['detail']}")
_push_status("step_result", suite=suite_name, result=r)
runner = CogTestRunner(on_result=_on_result)
runner = AssayTestRunner(on_result=_on_result)
results = runner.run(tc)
all_results[tc["name"]] = results

View File

@ -1,7 +1,7 @@
"""Test script for cog runtime API. Run with: .venv/Scripts/python.exe test_cog.py"""
"""Test script for assay runtime API. Run with: .venv/Scripts/python.exe test_assay.py"""
import httpx, sys, time
API = "https://cog.loop42.de/api"
API = "https://assay.loop42.de/api"
TOKEN = "7Oorb9S3OpwFyWgm4zi_Tq7GeamefbjjTgooPVPWAwPDOf6B4TvgvQlLbhmT4DjsqBS_D1g"
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
@ -21,12 +21,12 @@ tests = [
]
clear()
print("=== COG TEST RUN ===\n")
print("=== ASSAY TEST RUN ===\n")
for i, (msg, _) in enumerate(tests, 1):
print(f"--- {i}. USER: {msg}")
resp, memo = send(msg)
print(f" COG: {resp}")
print(f" ASSAY: {resp}")
print(f" MEMO: name={memo.get('user_name')} mood={memo.get('user_mood')} topic={memo.get('topic')}")
print()
time.sleep(0.5)

View File

@ -2,7 +2,7 @@
Usage:
python run_all.py # local only
python run_all.py --report https://cog.loop42.de # + push to frontend
python run_all.py --report https://assay.loop42.de # + push to frontend
"""
import sys

View File

@ -1,9 +1,9 @@
"""Integration tests — send real messages through the live runtime pipeline.
Usage:
python run_integration.py --url https://cog.loop42.de --graph v1-current
python run_integration.py --url https://cog.loop42.de --graph v2-director-drives
python run_integration.py --url https://cog.loop42.de --graph both
python run_integration.py --url https://assay.loop42.de --graph v1-current
python run_integration.py --url https://assay.loop42.de --graph v2-director-drives
python run_integration.py --url https://assay.loop42.de --graph both
"""
import json
@ -30,7 +30,7 @@ from harness import NodeTestRunner
# --- Config ---
DEFAULT_URL = "https://cog.loop42.de"
DEFAULT_URL = "https://assay.loop42.de"
TOKEN = "7Oorb9S3OpwFyWgm4zi_Tq7GeamefbjjTgooPVPWAwPDOf6B4TvgvQlLbhmT4DjsqBS_D1g"
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}

View File

@ -1,98 +1,6 @@
{
"timestamp": "2026-03-30 00:02:55",
"timestamp": "2026-03-30 21:04:54",
"testcases": {
"Artifact System": [
{
"step": "Setup",
"check": "clear",
"status": "PASS",
"detail": "cleared"
},
{
"step": "Query produces data_table artifact",
"check": "send: show me 3 customers in a table",
"status": "PASS",
"detail": "response: The database contains information for three customers: Kathrin Jager, Leon Schre"
},
{
"step": "Query produces data_table artifact",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Query produces data_table artifact",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 138 > 10"
},
{
"step": "Entity detail via card",
"check": "send: show me details for customer 1",
"status": "PASS",
"detail": "response: ```tool_code\nquery_db({\"query\":\"SELECT * FROM customers WHERE customer_id = 1\"})"
},
{
"step": "Entity detail via card",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Entity detail via card",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 84 > 10"
},
{
"step": "Action bar via buttons",
"check": "send: create two buttons on my dashboard: Refr",
"status": "PASS",
"detail": "response: I have added the 'Refresh' and 'Export' buttons to your dashboard. These buttons"
},
{
"step": "Action bar via buttons",
"check": "actions: length >= 2",
"status": "PASS",
"detail": "2 actions >= 2"
},
{
"step": "Action bar via buttons",
"check": "actions: any action contains \"refresh\" or \"Refresh\"",
"status": "PASS",
"detail": "found 'refresh' in actions"
},
{
"step": "Machine artifact",
"check": "send: create a machine called \"flow\" with init",
"status": "PASS",
"detail": "response: OK, I've created a new interactive machine called 'flow' with the initial state "
},
{
"step": "Machine artifact",
"check": "trace: has machine_created",
"status": "PASS",
"detail": "found event 'machine_created'"
},
{
"step": "Query after buttons survive",
"check": "send: how many customers are there?",
"status": "PASS",
"detail": "response: There are 693 customers in the database.\n"
},
{
"step": "Query after buttons survive",
"check": "response: length > 5",
"status": "PASS",
"detail": "length 41 > 5"
},
{
"step": "Query after buttons survive",
"check": "actions: any action contains \"refresh\" or \"Refresh\"",
"status": "PASS",
"detail": "found 'refresh' in actions"
}
],
"Fast v4": [
{
"step": "Setup",
@ -104,19 +12,19 @@
"step": "Reflex",
"check": "send: hi!",
"status": "PASS",
"detail": "response: Hey Nico! 👋 How can I help you today?\n"
"detail": "response: Hey there! 👋 What's on your mind today?\n"
},
{
"step": "Reflex",
"check": "response: length > 2",
"status": "PASS",
"detail": "length 38 > 2"
"detail": "length 40 > 2"
},
{
"step": "PA routes to expert",
"check": "send: show me 3 customers",
"status": "PASS",
"detail": "response: Alright, I've fetched 3 customer records for you. You can see the ID, Name detai"
"detail": "response: I'm sorry, but I'm still having trouble connecting to the customer database 😞. I"
},
{
"step": "PA routes to expert",
@ -134,13 +42,13 @@
"step": "PA routes to expert",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 181 > 10"
"detail": "length 126 > 10"
},
{
"step": "German query",
"check": "send: Zeig mir alle Tabellen in der Datenbank",
"status": "PASS",
"detail": "response: Okay, ich habe eine Liste aller Tabellen in der \"eras2_production\" Datenbank abg"
"detail": "response: Momentan kann ich keine Verbindung zur Datenbank herstellen, um Tabellen aufzuli"
},
{
"step": "German query",
@ -152,13 +60,13 @@
"step": "German query",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 303 > 10"
"detail": "length 123 > 10"
},
{
"step": "Schema discovery",
"check": "send: describe the kunden table",
"status": "PASS",
"detail": "response: The `kunden` table stores customer information, including names, customer number"
"detail": "response: I am still unable to access the database, but I will try again to describe the \""
},
{
"step": "Schema discovery",
@ -170,13 +78,13 @@
"step": "Schema discovery",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 391 > 10"
"detail": "length 95 > 10"
},
{
"step": "Count query (cached schema)",
"check": "send: how many customers are there?",
"status": "PASS",
"detail": "response: There are 693 customers in the database.\n"
"detail": "response: I'm still having trouble connecting to the database. The connection is unstable."
},
{
"step": "Count query (cached schema)",
@ -188,13 +96,13 @@
"step": "Count query (cached schema)",
"check": "response: length > 5",
"status": "PASS",
"detail": "length 41 > 5"
"detail": "length 271 > 5"
},
{
"step": "Complex query",
"check": "send: which customers have the most devices?",
"status": "PASS",
"detail": "response: The query results list the top 10 customers with the most devices. Anne Bürger h"
"detail": "response: I am still having difficulty accessing the database. Once access is restored, I "
},
{
"step": "Complex query",
@ -206,13 +114,13 @@
"step": "Complex query",
"check": "response: length > 20",
"status": "PASS",
"detail": "length 166 > 20"
"detail": "length 185 > 20"
},
{
"step": "Error recovery",
"check": "send: SELECT * FROM nichtexistiert",
"status": "PASS",
"detail": "response: Oops! It seems like the table `nichtexistiert` doesn't exist in the database. 🤔 "
"detail": "response: ```text\nError: Table 'real_estate.nichtexistiert' doesn't exist\n```\n\nIt looks li"
},
{
"step": "Error recovery",
@ -224,25 +132,25 @@
"step": "Error recovery",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 396 > 10"
"detail": "length 166 > 10"
},
{
"step": "Memorizer",
"check": "send: my name is Nico",
"status": "PASS",
"detail": "response: Thanks, Nico! I'll remember that. Do you have any other questions? 😊\n"
"detail": "response: Nice to meet you, Nico! 👋 Now that I know your name, how can I help you today?\n"
},
{
"step": "Memorizer",
"check": "state: facts any contains \"Nico\"",
"status": "PASS",
"detail": "found 'Nico' in facts"
"status": "FAIL",
"detail": "none of ['Nico'] found in facts: []"
},
{
"step": "Language switch",
"check": "send: Hallo wie gehts?",
"status": "PASS",
"detail": "response: Mir geht es gut, danke der Nachfrage, Nico! Und selbst? Gibt es etwas, bei dem i"
"detail": "response: Mir geht es gut, danke der Nachfrage! 😊 Was kann ich für dich tun, Nico?"
},
{
"step": "Language switch",
@ -254,110 +162,18 @@
"step": "Bye",
"check": "send: ok bye",
"status": "PASS",
"detail": "response: Bye Nico! 👋 If you need anything else, just let me know. 😊\n"
"detail": "response: Bis bald, Nico! 👋 Lass es mich wissen, wenn du wieder etwas brauchst."
},
{
"step": "Bye",
"check": "response: length > 2",
"status": "PASS",
"detail": "length 59 > 2"
}
],
"Dashboard Integration": [
{
"step": "Setup",
"check": "clear",
"status": "PASS",
"detail": "cleared"
},
{
"step": "Expert creates buttons",
"check": "send: create two buttons on my dashboard: Repo",
"status": "PASS",
"detail": "response: I have added 'Report' and 'Export' buttons to your dashboard.\n\n(UI buttons shown"
},
{
"step": "Expert creates buttons",
"check": "actions: length >= 2",
"status": "PASS",
"detail": "3 actions >= 2"
},
{
"step": "Expert creates buttons",
"check": "actions: any action contains \"report\" or \"Report\"",
"status": "PASS",
"detail": "found 'report' in actions"
},
{
"step": "Buttons survive a query",
"check": "send: how many customers are there?",
"status": "PASS",
"detail": "response: I'm running a query to count all customer IDs. One moment...\n"
},
{
"step": "Buttons survive a query",
"check": "response: length > 5",
"status": "PASS",
"detail": "length 61 > 5"
},
{
"step": "Buttons survive a query",
"check": "actions: any action contains \"report\" or \"Report\"",
"status": "PASS",
"detail": "found 'report' in actions"
},
{
"step": "Expert creates a machine",
"check": "send: create a navigation machine called \"work",
"status": "PASS",
"detail": "response: I've created the 'workflow' machine with 'start' and 'step2' states. The 'start'"
},
{
"step": "Expert creates a machine",
"check": "trace: has tool_call create_machine",
"status": "PASS",
"detail": "found create_machine via machine_created event"
},
{
"step": "Expert shows data table",
"check": "send: show me 5 customers in a table",
"status": "PASS",
"detail": "response: Here are five customer entries with their IDs, names, object count, and status:\n"
},
{
"step": "Expert shows data table",
"check": "trace: has tool_call",
"status": "PASS",
"detail": "found event 'tool_call'"
},
{
"step": "Expert shows data table",
"check": "response: length > 10",
"status": "PASS",
"detail": "length 118 > 10"
},
{
"step": "Expert replaces buttons",
"check": "send: remove all buttons and create one button",
"status": "PASS",
"detail": "response: I have removed the existing 'Report' and 'Export' buttons from the dashboard and"
},
{
"step": "Expert replaces buttons",
"check": "actions: length >= 1",
"status": "PASS",
"detail": "2 actions >= 1"
},
{
"step": "Expert replaces buttons",
"check": "actions: any action contains \"reset\" or \"Reset\"",
"status": "PASS",
"detail": "found 'reset' in actions"
"detail": "length 69 > 2"
}
]
},
"summary": {
"passed": 58,
"failed": 0
"passed": 27,
"failed": 1
}
}