- testcases/*.md: declarative test definitions (send, expect_response, expect_state, expect_actions, action) - runtime_test.py: standalone runner + pytest integration via conftest.py - /tests route: web UI showing last run results from results.json - /api/tests: serves results JSON - Two initial testcases: counter_state (UI actions) and pub_conversation (multi-turn, language switch, tool use, memorizer state) - pub_conversation: 19/20 passed on first run - Fix nm-text vertical overflow in node metrics bar Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
131 lines
4.2 KiB
JSON
131 lines
4.2 KiB
JSON
{
|
|
"timestamp": "2026-03-28 15:34:02",
|
|
"testcases": {
|
|
"Pub Conversation": [
|
|
{
|
|
"step": "Setup",
|
|
"check": "clear",
|
|
"status": "PASS",
|
|
"detail": "cleared"
|
|
},
|
|
{
|
|
"step": "Set the scene",
|
|
"check": "send: Hey, Tina and I are heading to the pub t",
|
|
"status": "PASS",
|
|
"detail": "response: Das ist toll! Was trinkt ihr beide heute Abend?\n"
|
|
},
|
|
{
|
|
"step": "Set the scene",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 48 > 10"
|
|
},
|
|
{
|
|
"step": "Set the scene",
|
|
"check": "state: situation contains \"pub\" or \"Tina\"",
|
|
"status": "PASS",
|
|
"detail": "situation=at a pub with tina, authenticated on https://cog.l contains 'pub'"
|
|
},
|
|
{
|
|
"step": "Language switch to German",
|
|
"check": "send: Wir sind jetzt im Biergarten angekommen",
|
|
"status": "PASS",
|
|
"detail": "response: Super, genießt euer Biergarten-Erlebnis! Und was ist mit Tina? Trinkt sie auch e"
|
|
},
|
|
{
|
|
"step": "Language switch to German",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 95 > 10"
|
|
},
|
|
{
|
|
"step": "Language switch to German",
|
|
"check": "state: language is \"de\" or \"mixed\"",
|
|
"status": "PASS",
|
|
"detail": "language=mixed"
|
|
},
|
|
{
|
|
"step": "Context awareness",
|
|
"check": "send: Was sollen wir bestellen?",
|
|
"status": "PASS",
|
|
"detail": "response: Kommt drauf an, worauf ihr Lust habt! Im Biergarten sind Klassiker wie **Helles*"
|
|
},
|
|
{
|
|
"step": "Context awareness",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 255 > 10"
|
|
},
|
|
{
|
|
"step": "Context awareness",
|
|
"check": "state: topic contains \"bestell\" or \"order\" or \"pub\" or \"Biergarten\"",
|
|
"status": "PASS",
|
|
"detail": "topic=ordering drinks contains 'order'"
|
|
},
|
|
{
|
|
"step": "Tina speaks",
|
|
"check": "send: Tina says: I'll have a Hefeweizen please",
|
|
"status": "PASS",
|
|
"detail": "response: Ah, Tina bleibt ihren Vorlieben treu! Eine gute Wahl. Und für dich, Nico? Suchst"
|
|
},
|
|
{
|
|
"step": "Tina speaks",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 148 > 10"
|
|
},
|
|
{
|
|
"step": "Tina speaks",
|
|
"check": "state: facts any contains \"Tina\" or \"Hefeweizen\"",
|
|
"status": "PASS",
|
|
"detail": "found 'Tina' in facts"
|
|
},
|
|
{
|
|
"step": "Ask for time (tool use)",
|
|
"check": "send: wie spaet ist es eigentlich?",
|
|
"status": "PASS",
|
|
"detail": "response: Du hast mich noch gar nicht danach gefragt. Es ist kurz vor halb 4. Also perfekt"
|
|
},
|
|
{
|
|
"step": "Ask for time (tool use)",
|
|
"check": "response: matches \\d{1,2}:\\d{2}",
|
|
"status": "FAIL",
|
|
"detail": "/\\d{1,2}:\\d{2}/ not found in: Du hast mich noch gar nicht danach gefragt. Es ist kurz vor halb 4. Also perfekt, um den Feierabend "
|
|
},
|
|
{
|
|
"step": "Back to English",
|
|
"check": "send: Let's switch to English, what was the la",
|
|
"status": "PASS",
|
|
"detail": "response: Okay, switching to English! 😉 The last thing Tina said was: \"I'll have a Hefewei"
|
|
},
|
|
{
|
|
"step": "Back to English",
|
|
"check": "state: language is \"en\" or \"mixed\"",
|
|
"status": "PASS",
|
|
"detail": "language=mixed"
|
|
},
|
|
{
|
|
"step": "Back to English",
|
|
"check": "response: contains \"Tina\" or \"Hefeweizen\"",
|
|
"status": "PASS",
|
|
"detail": "found 'Tina'"
|
|
},
|
|
{
|
|
"step": "Mood check",
|
|
"check": "send: This is really fun!",
|
|
"status": "PASS",
|
|
"detail": "response: Indeed! Glad you're having fun. It's always a pleasure chatting with you, Nico. "
|
|
},
|
|
{
|
|
"step": "Mood check",
|
|
"check": "state: user_mood is \"happy\" or \"playful\" or \"excited\"",
|
|
"status": "PASS",
|
|
"detail": "user_mood=happy"
|
|
}
|
|
]
|
|
},
|
|
"summary": {
|
|
"passed": 19,
|
|
"failed": 1
|
|
}
|
|
} |