- Rename files: cog_cli.py, test_cog.py, k8s/cog-*.yaml - Update all Python tool names: cog_* -> assay_* - Update FastAPI titles, MCP server names, URLs - Update K8s manifests: deployments, services, secrets, ingress - Update Docker env vars: COG_API -> ASSAY_API - Domain: cog.loop42.de -> assay.loop42.de - SQLite path: /tmp/cog_db.sqlite -> /tmp/assay_db.sqlite Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
179 lines
5.2 KiB
JSON
179 lines
5.2 KiB
JSON
{
|
|
"timestamp": "2026-03-30 21:04:54",
|
|
"testcases": {
|
|
"Fast v4": [
|
|
{
|
|
"step": "Setup",
|
|
"check": "clear",
|
|
"status": "PASS",
|
|
"detail": "cleared"
|
|
},
|
|
{
|
|
"step": "Reflex",
|
|
"check": "send: hi!",
|
|
"status": "PASS",
|
|
"detail": "response: Hey there! 👋 What's on your mind today?\n"
|
|
},
|
|
{
|
|
"step": "Reflex",
|
|
"check": "response: length > 2",
|
|
"status": "PASS",
|
|
"detail": "length 40 > 2"
|
|
},
|
|
{
|
|
"step": "PA routes to expert",
|
|
"check": "send: show me 3 customers",
|
|
"status": "PASS",
|
|
"detail": "response: I'm sorry, but I'm still having trouble connecting to the customer database 😞. I"
|
|
},
|
|
{
|
|
"step": "PA routes to expert",
|
|
"check": "trace: has routed",
|
|
"status": "PASS",
|
|
"detail": "found event 'routed'"
|
|
},
|
|
{
|
|
"step": "PA routes to expert",
|
|
"check": "trace: has tool_call",
|
|
"status": "PASS",
|
|
"detail": "found event 'tool_call'"
|
|
},
|
|
{
|
|
"step": "PA routes to expert",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 126 > 10"
|
|
},
|
|
{
|
|
"step": "German query",
|
|
"check": "send: Zeig mir alle Tabellen in der Datenbank",
|
|
"status": "PASS",
|
|
"detail": "response: Momentan kann ich keine Verbindung zur Datenbank herstellen, um Tabellen aufzuli"
|
|
},
|
|
{
|
|
"step": "German query",
|
|
"check": "trace: has tool_call",
|
|
"status": "PASS",
|
|
"detail": "found event 'tool_call'"
|
|
},
|
|
{
|
|
"step": "German query",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 123 > 10"
|
|
},
|
|
{
|
|
"step": "Schema discovery",
|
|
"check": "send: describe the kunden table",
|
|
"status": "PASS",
|
|
"detail": "response: I am still unable to access the database, but I will try again to describe the \""
|
|
},
|
|
{
|
|
"step": "Schema discovery",
|
|
"check": "trace: has tool_call",
|
|
"status": "PASS",
|
|
"detail": "found event 'tool_call'"
|
|
},
|
|
{
|
|
"step": "Schema discovery",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 95 > 10"
|
|
},
|
|
{
|
|
"step": "Count query (cached schema)",
|
|
"check": "send: how many customers are there?",
|
|
"status": "PASS",
|
|
"detail": "response: I'm still having trouble connecting to the database. The connection is unstable."
|
|
},
|
|
{
|
|
"step": "Count query (cached schema)",
|
|
"check": "trace: has tool_call",
|
|
"status": "PASS",
|
|
"detail": "found event 'tool_call'"
|
|
},
|
|
{
|
|
"step": "Count query (cached schema)",
|
|
"check": "response: length > 5",
|
|
"status": "PASS",
|
|
"detail": "length 271 > 5"
|
|
},
|
|
{
|
|
"step": "Complex query",
|
|
"check": "send: which customers have the most devices?",
|
|
"status": "PASS",
|
|
"detail": "response: I am still having difficulty accessing the database. Once access is restored, I "
|
|
},
|
|
{
|
|
"step": "Complex query",
|
|
"check": "trace: has tool_call",
|
|
"status": "PASS",
|
|
"detail": "found event 'tool_call'"
|
|
},
|
|
{
|
|
"step": "Complex query",
|
|
"check": "response: length > 20",
|
|
"status": "PASS",
|
|
"detail": "length 185 > 20"
|
|
},
|
|
{
|
|
"step": "Error recovery",
|
|
"check": "send: SELECT * FROM nichtexistiert",
|
|
"status": "PASS",
|
|
"detail": "response: ```text\nError: Table 'real_estate.nichtexistiert' doesn't exist\n```\n\nIt looks li"
|
|
},
|
|
{
|
|
"step": "Error recovery",
|
|
"check": "trace: has tool_call",
|
|
"status": "PASS",
|
|
"detail": "found event 'tool_call'"
|
|
},
|
|
{
|
|
"step": "Error recovery",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 166 > 10"
|
|
},
|
|
{
|
|
"step": "Memorizer",
|
|
"check": "send: my name is Nico",
|
|
"status": "PASS",
|
|
"detail": "response: Nice to meet you, Nico! 👋 Now that I know your name, how can I help you today?\n"
|
|
},
|
|
{
|
|
"step": "Memorizer",
|
|
"check": "state: facts any contains \"Nico\"",
|
|
"status": "FAIL",
|
|
"detail": "none of ['Nico'] found in facts: []"
|
|
},
|
|
{
|
|
"step": "Language switch",
|
|
"check": "send: Hallo wie gehts?",
|
|
"status": "PASS",
|
|
"detail": "response: Mir geht es gut, danke der Nachfrage! 😊 Was kann ich für dich tun, Nico?"
|
|
},
|
|
{
|
|
"step": "Language switch",
|
|
"check": "state: language is \"de\" or \"mixed\"",
|
|
"status": "PASS",
|
|
"detail": "language=mixed"
|
|
},
|
|
{
|
|
"step": "Bye",
|
|
"check": "send: ok bye",
|
|
"status": "PASS",
|
|
"detail": "response: Bis bald, Nico! 👋 Lass es mich wissen, wenn du wieder etwas brauchst."
|
|
},
|
|
{
|
|
"step": "Bye",
|
|
"check": "response: length > 2",
|
|
"status": "PASS",
|
|
"detail": "length 69 > 2"
|
|
}
|
|
]
|
|
},
|
|
"summary": {
|
|
"passed": 27,
|
|
"failed": 1
|
|
}
|
|
} |