RED->GREEN->REFACTOR cycle: - UI node has state store (key-value), action bindings (op/var), and local action handlers (inc/dec/set/toggle — no LLM round-trip) - Thinker self-model: knows its environment, that ACTIONS create real buttons, that UI handles state locally. Emits var/op payload for stateful actions. - Thinker's context includes UI state so it can report current values - /api/clear resets UI state, bindings, and controls - Test runner: action_match for fuzzy action names, persistent actions across steps, _stream_text restored - Counter test: 16/16 passed (create, read, inc, inc, dec, verify) - Pub test: 20/20 passed (conversation, language switch, tool use, mood) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
229 lines
6.7 KiB
JSON
229 lines
6.7 KiB
JSON
{
|
|
"timestamp": "2026-03-28 15:50:12",
|
|
"testcases": {
|
|
"Counter State": [
|
|
{
|
|
"step": "Setup",
|
|
"check": "clear",
|
|
"status": "PASS",
|
|
"detail": "cleared"
|
|
},
|
|
{
|
|
"step": "Create counter",
|
|
"check": "send: create a counter starting at 0 with incr",
|
|
"status": "PASS",
|
|
"detail": "response: Sure, here is a counter starting at 0. You can increment or decrement it using t"
|
|
},
|
|
{
|
|
"step": "Create counter",
|
|
"check": "response: contains \"counter\" or \"count\"",
|
|
"status": "PASS",
|
|
"detail": "found 'counter'"
|
|
},
|
|
{
|
|
"step": "Create counter",
|
|
"check": "actions: length >= 2",
|
|
"status": "PASS",
|
|
"detail": "2 actions >= 2"
|
|
},
|
|
{
|
|
"step": "Create counter",
|
|
"check": "actions: any action contains \"increment\" or \"inc\"",
|
|
"status": "PASS",
|
|
"detail": "found 'increment' in actions"
|
|
},
|
|
{
|
|
"step": "Create counter",
|
|
"check": "actions: any action contains \"decrement\" or \"dec\"",
|
|
"status": "PASS",
|
|
"detail": "found 'decrement' in actions"
|
|
},
|
|
{
|
|
"step": "Check state",
|
|
"check": "state: topic contains \"counter\" or \"count\" or \"button\"",
|
|
"status": "PASS",
|
|
"detail": "topic=javascript counter contains 'counter'"
|
|
},
|
|
{
|
|
"step": "Ask for current value",
|
|
"check": "send: what is the current count?",
|
|
"status": "PASS",
|
|
"detail": "response: The current count is 0.\n"
|
|
},
|
|
{
|
|
"step": "Ask for current value",
|
|
"check": "response: contains \"0\"",
|
|
"status": "PASS",
|
|
"detail": "found '0'"
|
|
},
|
|
{
|
|
"step": "Increment",
|
|
"check": "action: increment",
|
|
"status": "PASS",
|
|
"detail": "response: count is now 1"
|
|
},
|
|
{
|
|
"step": "Increment",
|
|
"check": "response: contains \"1\"",
|
|
"status": "PASS",
|
|
"detail": "found '1'"
|
|
},
|
|
{
|
|
"step": "Increment again",
|
|
"check": "action: increment",
|
|
"status": "PASS",
|
|
"detail": "response: count is now 2"
|
|
},
|
|
{
|
|
"step": "Increment again",
|
|
"check": "response: contains \"2\"",
|
|
"status": "PASS",
|
|
"detail": "found '2'"
|
|
},
|
|
{
|
|
"step": "Decrement",
|
|
"check": "action: decrement",
|
|
"status": "PASS",
|
|
"detail": "response: count is now 1"
|
|
},
|
|
{
|
|
"step": "Decrement",
|
|
"check": "response: contains \"1\"",
|
|
"status": "PASS",
|
|
"detail": "found '1'"
|
|
},
|
|
{
|
|
"step": "Verify memorizer tracks it",
|
|
"check": "state: topic contains \"count\"",
|
|
"status": "PASS",
|
|
"detail": "topic=javascript counter contains 'count'"
|
|
}
|
|
],
|
|
"Pub Conversation": [
|
|
{
|
|
"step": "Setup",
|
|
"check": "clear",
|
|
"status": "PASS",
|
|
"detail": "cleared"
|
|
},
|
|
{
|
|
"step": "Set the scene",
|
|
"check": "send: Hey, Tina and I are heading to the pub t",
|
|
"status": "PASS",
|
|
"detail": "response: Sounds fun! Enjoy your night at the pub with Tina! What are your plans for the e"
|
|
},
|
|
{
|
|
"step": "Set the scene",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 88 > 10"
|
|
},
|
|
{
|
|
"step": "Set the scene",
|
|
"check": "state: situation contains \"pub\" or \"Tina\"",
|
|
"status": "PASS",
|
|
"detail": "situation=at a pub with Tina contains 'pub'"
|
|
},
|
|
{
|
|
"step": "Language switch to German",
|
|
"check": "send: Wir sind jetzt im Biergarten angekommen",
|
|
"status": "PASS",
|
|
"detail": "response: Super! Habt eine schöne Zeit im Biergarten!\n"
|
|
},
|
|
{
|
|
"step": "Language switch to German",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 44 > 10"
|
|
},
|
|
{
|
|
"step": "Language switch to German",
|
|
"check": "state: language is \"de\" or \"mixed\"",
|
|
"status": "PASS",
|
|
"detail": "language=mixed"
|
|
},
|
|
{
|
|
"step": "Context awareness",
|
|
"check": "send: Was sollen wir bestellen?",
|
|
"status": "PASS",
|
|
"detail": "response: Hmm, bei dem schönen Wetter würde doch ein kühles Bier oder eine erfrischende Sc"
|
|
},
|
|
{
|
|
"step": "Context awareness",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 121 > 10"
|
|
},
|
|
{
|
|
"step": "Context awareness",
|
|
"check": "state: topic contains \"bestell\" or \"order\" or \"pub\" or \"Biergarten\"",
|
|
"status": "PASS",
|
|
"detail": "topic=being at the Biergarten contains 'Biergarten'"
|
|
},
|
|
{
|
|
"step": "Tina speaks",
|
|
"check": "send: Tina says: I'll have a Hefeweizen please",
|
|
"status": "PASS",
|
|
"detail": "response: Tina möchte also ein Hefeweizen. Was möchtest du bestellen, Nico?\n"
|
|
},
|
|
{
|
|
"step": "Tina speaks",
|
|
"check": "response: length > 10",
|
|
"status": "PASS",
|
|
"detail": "length 66 > 10"
|
|
},
|
|
{
|
|
"step": "Tina speaks",
|
|
"check": "state: facts any contains \"Tina\" or \"Hefeweizen\"",
|
|
"status": "PASS",
|
|
"detail": "found 'Tina' in facts"
|
|
},
|
|
{
|
|
"step": "Ask for time (tool use)",
|
|
"check": "send: wie spaet ist es eigentlich?",
|
|
"status": "PASS",
|
|
"detail": "response: Es ist 15:49 Uhr.\n"
|
|
},
|
|
{
|
|
"step": "Ask for time (tool use)",
|
|
"check": "response: matches \\d{1,2}:\\d{2}",
|
|
"status": "PASS",
|
|
"detail": "matched /\\d{1,2}:\\d{2}/"
|
|
},
|
|
{
|
|
"step": "Back to English",
|
|
"check": "send: Let's switch to English, what was the la",
|
|
"status": "PASS",
|
|
"detail": "response: Tina said she wants a Hefeweizen.\n"
|
|
},
|
|
{
|
|
"step": "Back to English",
|
|
"check": "state: language is \"en\" or \"mixed\"",
|
|
"status": "PASS",
|
|
"detail": "language=mixed"
|
|
},
|
|
{
|
|
"step": "Back to English",
|
|
"check": "response: contains \"Tina\" or \"Hefeweizen\"",
|
|
"status": "PASS",
|
|
"detail": "found 'Tina'"
|
|
},
|
|
{
|
|
"step": "Mood check",
|
|
"check": "send: This is really fun!",
|
|
"status": "PASS",
|
|
"detail": "response: I'm glad you're enjoying our conversation, Nico! It's fun for me too. What other"
|
|
},
|
|
{
|
|
"step": "Mood check",
|
|
"check": "state: user_mood is \"happy\" or \"playful\" or \"excited\"",
|
|
"status": "PASS",
|
|
"detail": "user_mood=happy"
|
|
}
|
|
]
|
|
},
|
|
"summary": {
|
|
"passed": 36,
|
|
"failed": 0
|
|
}
|
|
} |