A 5e storytelling engine with an LLM DM
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 631 lines 23 kB view raw
1"""Tests for the FastMCP server composition. 2 3The orchestrator in storied.mcp_server builds a per-role top-level FastMCP 4server by mounting the tools/*.py module-level FastMCP instances and 5applying tag-based visibility filters. These tests verify the per-role 6tool visibility plus the dynamic combat-tag flip when initiative starts 7and ends. 8""" 9 10import asyncio 11 12import pytest 13 14from storied.mcp_server import _compose_server 15from storied.tools import ToolContext 16from storied.tools.character import refresh_advancement_visibility 17from storied.tools.combat import _flip_into_combat, _flip_out_of_combat 18 19 20def _names(role: str) -> set[str]: 21 async def _gather() -> set[str]: 22 server = await _compose_server(role) 23 return {t.name for t in await server.list_tools()} 24 25 return asyncio.run(_gather()) 26 27 28class TestPerRoleComposition: 29 """Each role sees only the tools tagged for it.""" 30 31 def test_dm_includes_core_narrative_tools(self): 32 names = _names("dm") 33 assert "set_scene" in names 34 assert "establish" in names 35 assert "mark" in names 36 assert "end_session" in names 37 assert "recall" in names 38 assert "roll" in names 39 assert "run_code" in names 40 41 def test_dm_includes_character_tools(self): 42 names = _names("dm") 43 for tool_name in ( 44 "damage", 45 "heal", 46 "adjust_coins", 47 "add_effect", 48 "remove_effect", 49 "add_condition", 50 "remove_condition", 51 "add_item", 52 "remove_item", 53 "set_item_status", 54 "adjust_resource", 55 "rest", 56 "add_note", 57 "update_character", 58 "create_character", 59 ): 60 assert tool_name in names, f"missing {tool_name}" 61 62 def test_dm_does_not_include_removed_tools(self): 63 """break_concentration, use_resource, and restore_resource were 64 folded into other tools — ensure they don't resurface.""" 65 names = _names("dm") 66 assert "break_concentration" not in names 67 assert "use_resource" not in names 68 assert "restore_resource" not in names 69 70 def test_dm_initial_excludes_combat_tools(self): 71 """In DM mode, combat tools are hidden until enter_initiative runs.""" 72 names = _names("dm") 73 assert "next_turn" not in names 74 assert "add_combatant" not in names 75 assert "remove_combatant" not in names 76 assert "condition" not in names 77 78 def test_dm_initial_keeps_combat_control(self): 79 """enter_initiative and end_initiative stay visible so combat can begin.""" 80 names = _names("dm") 81 assert "enter_initiative" in names 82 assert "end_initiative" in names 83 84 def test_planner_only_has_its_tools(self): 85 assert _names("planner") == { 86 "establish", 87 "mark", 88 "amend_mark", 89 "notify_dm", 90 "recall", 91 "forge_culture", 92 "generate_names", 93 } 94 95 def test_seeder_only_has_its_tools(self): 96 assert _names("seeder") == { 97 "establish", 98 "set_scene", 99 "forge_culture", 100 "generate_names", 101 } 102 103 def test_advancement_only_has_its_tools(self): 104 assert _names("advancement") == {"notify_dm", "recall", "update_character"} 105 106 def test_arc_architect_only_has_its_tools(self): 107 # The arc architect's whole job is critique-and-commit. Nothing 108 # else. World-building tools must not leak into this role. 109 assert _names("arc_architect") == {"commit_arc", "recall"} 110 111 def test_arc_architect_in_all_roles(self): 112 from storied.mcp_server import ALL_ROLES 113 114 assert "arc_architect" in ALL_ROLES 115 116 117class TestToolSchemas: 118 """Verify tool input schemas expose nested field shapes to the LLM. 119 120 These tests guard against regression to bare `dict` / `list` parameter 121 types, which leave the LLM with no guidance about which keys are required. 122 """ 123 124 def _schema(self, tool_name: str) -> dict: 125 async def _gather() -> dict: 126 server = await _compose_server("dm") 127 for t in await server.list_tools(): 128 if t.name == tool_name: 129 return t.parameters 130 raise AssertionError(f"tool {tool_name!r} not found") 131 132 return asyncio.run(_gather()) 133 134 def test_enter_initiative_documents_combatant_shape(self): 135 schema = self._schema("enter_initiative") 136 item_schema = schema["properties"]["combatants"]["items"] 137 required = set(item_schema["required"]) 138 assert {"name", "initiative", "hp", "hp_max", "ac"} <= required, ( 139 f"enter_initiative must require all combatant fields, got {required}" 140 ) 141 # is_player is optional but documented 142 assert "is_player" in item_schema["properties"] 143 144 def test_create_character_documents_ability_keys(self): 145 schema = self._schema("create_character") 146 ability_props = schema["properties"]["abilities"]["properties"] 147 for ability in ( 148 "strength", 149 "dexterity", 150 "constitution", 151 "intelligence", 152 "wisdom", 153 "charisma", 154 ): 155 assert ability in ability_props, ( 156 f"create_character must document the {ability} ability score" 157 ) 158 159 def test_adjust_coins_documents_denominations(self): 160 schema = self._schema("adjust_coins") 161 delta_props = schema["properties"]["deltas"]["properties"] 162 for denom in ("cp", "sp", "ep", "gp", "pp"): 163 assert denom in delta_props, ( 164 f"adjust_coins must document the {denom} denomination" 165 ) 166 167 def test_create_character_purse_documents_denominations(self): 168 schema = self._schema("create_character") 169 purse = schema["properties"]["purse"] 170 # Purse is wrapped in anyOf for the | None 171 purse_props = next( 172 opt for opt in purse["anyOf"] if opt.get("type") == "object" 173 )["properties"] 174 for denom in ("cp", "sp", "ep", "gp", "pp"): 175 assert denom in purse_props 176 177 @pytest.mark.parametrize( 178 ("tool_name", "param", "expected_values"), 179 [ 180 ("rest", "type", {"short", "long"}), 181 ("set_item_status", "status", {"attuned", "equipped", "carried"}), 182 ("recall", "scope", {"rules", "world", "all"}), 183 ( 184 "establish", 185 "entity_type", 186 { 187 "npcs", 188 "locations", 189 "items", 190 "factions", 191 "threads", 192 "lore", 193 "maps", 194 "cultures", 195 }, 196 ), 197 ( 198 "mark", 199 "entity_type", 200 { 201 "npcs", 202 "locations", 203 "items", 204 "factions", 205 "threads", 206 "maps", 207 "cultures", 208 }, 209 ), 210 ( 211 "note_discovery", 212 "content_type", 213 {"npcs", "locations", "factions", "lore", "cultures"}, 214 ), 215 ], 216 ) 217 def test_enum_parameters_expose_valid_values( 218 self, 219 tool_name: str, 220 param: str, 221 expected_values: set[str], 222 ): 223 """Each conceptually-enum parameter must surface as a JSON Schema 224 enum, not a free-form string. Guards against regression to bare `str`.""" 225 schema = self._schema(tool_name) 226 prop = schema["properties"][param] 227 # Default values wrap the enum in anyOf for `Type | None`; unwrap if needed 228 enum_values = prop.get("enum") 229 if enum_values is None and "anyOf" in prop: 230 for opt in prop["anyOf"]: 231 if "enum" in opt: 232 enum_values = opt["enum"] 233 break 234 assert enum_values is not None, ( 235 f"{tool_name}.{param} should expose an enum, got {prop}" 236 ) 237 assert set(enum_values) == expected_values 238 239 def test_combat_condition_enum_parameters(self): 240 """The combat `condition` tool is hidden in the default DM compose 241 (combat-only), so check it directly on the combat module.""" 242 from storied.tools.combat import mcp as combat_mcp 243 244 async def _gather() -> dict: 245 for t in await combat_mcp.list_tools(): 246 if t.name == "condition": 247 return t.parameters 248 raise AssertionError("condition tool not found") 249 250 params = asyncio.run(_gather()) 251 assert set(params["properties"]["action"]["enum"]) == {"add", "remove"} 252 assert set(params["properties"]["ends_on"]["enum"]) == {"start", "end"} 253 254 255class TestCombatTagFlip: 256 """Entering and ending initiative toggles combat-tag visibility on the 257 composed top-level server.""" 258 259 def test_flip_into_combat_shows_combat_tools(self, ctx: ToolContext): 260 async def _run() -> set[str]: 261 server = await _compose_server("dm") 262 _flip_into_combat() 263 return {t.name for t in await server.list_tools()} 264 265 names = asyncio.run(_run()) 266 assert "next_turn" in names 267 assert "add_combatant" in names 268 assert "condition" in names 269 # Cleanup 270 _flip_out_of_combat() 271 272 def test_flip_out_of_combat_hides_combat_tools(self, ctx: ToolContext): 273 async def _run() -> set[str]: 274 server = await _compose_server("dm") 275 _flip_into_combat() 276 _flip_out_of_combat() 277 return {t.name for t in await server.list_tools()} 278 279 names = asyncio.run(_run()) 280 assert "next_turn" not in names 281 assert "add_combatant" not in names 282 283 def test_combat_control_stays_visible_through_cycle(self, ctx: ToolContext): 284 """enter_initiative / end_initiative are tagged combat_control and 285 must stay visible whether initiative is active or not.""" 286 287 async def _gather_combat_control() -> tuple[set[str], set[str], set[str]]: 288 server = await _compose_server("dm") 289 initial = {t.name for t in await server.list_tools()} 290 _flip_into_combat() 291 during = {t.name for t in await server.list_tools()} 292 _flip_out_of_combat() 293 after = {t.name for t in await server.list_tools()} 294 return initial, during, after 295 296 initial, during, after = asyncio.run(_gather_combat_control()) 297 for state in (initial, during, after): 298 assert "enter_initiative" in state 299 assert "end_initiative" in state 300 301 302class TestAdvancementVisibility: 303 """level_up is hidden until the character has advancement_ready set.""" 304 305 def test_level_up_hidden_at_compose_time(self, ctx: ToolContext): 306 """Fresh compose should not expose level_up — nothing has granted it yet.""" 307 308 async def _gather() -> set[str]: 309 server = await _compose_server("dm") 310 return {t.name for t in await server.list_tools()} 311 312 names = asyncio.run(_gather()) 313 assert "level_up" not in names 314 315 def test_level_up_revealed_when_advancement_ready(self, ctx: ToolContext): 316 async def _run() -> set[str]: 317 server = await _compose_server("dm") 318 refresh_advancement_visibility({"advancement_ready": 4}) 319 return {t.name for t in await server.list_tools()} 320 321 names = asyncio.run(_run()) 322 assert "level_up" in names 323 refresh_advancement_visibility(None) # cleanup 324 325 def test_level_up_hidden_again_when_flag_cleared(self, ctx: ToolContext): 326 async def _run() -> set[str]: 327 server = await _compose_server("dm") 328 refresh_advancement_visibility({"advancement_ready": 4}) 329 refresh_advancement_visibility({"advancement_ready": None}) 330 return {t.name for t in await server.list_tools()} 331 332 names = asyncio.run(_run()) 333 assert "level_up" not in names 334 335 def test_level_up_not_in_planner_compose(self, ctx: ToolContext): 336 """Only the DM role cares about advancement visibility. Other roles 337 don't have level_up at all, so the flip is a no-op for them.""" 338 339 async def _run() -> set[str]: 340 server = await _compose_server("planner") 341 return {t.name for t in await server.list_tools()} 342 343 names = asyncio.run(_run()) 344 assert "level_up" not in names 345 346 def test_refresh_with_none_character_is_safe(self, ctx: ToolContext): 347 """A character sheet that doesn't exist yet (pre-creation) should 348 not crash the visibility flip.""" 349 350 async def _run() -> None: 351 await _compose_server("dm") 352 refresh_advancement_visibility(None) 353 354 asyncio.run(_run()) 355 356 357class TestPopulateIndex: 358 """Cover the SRD-seeding helper without launching a real server. 359 360 Tests pass an explicit ``srd_root`` pointed at a tmp path so the 361 real package rules directory isn't touched. 362 """ 363 364 def test_no_srd_no_world_dir(self, tmp_path): 365 from unittest.mock import MagicMock 366 367 from storied.mcp_server import _populate_index 368 369 vi = MagicMock() 370 vi.has_source.return_value = False 371 _populate_index( 372 tmp_path / "worlds" / "missing", 373 vi, 374 srd_root=tmp_path / "srd-missing", 375 ) 376 # No SRD seed, no SRD sections, no world dir → nothing should be called 377 vi.reseed.assert_not_called() 378 vi.reindex_directory.assert_not_called() 379 380 def test_world_dir_only(self, tmp_path): 381 from unittest.mock import MagicMock 382 383 from storied.mcp_server import _populate_index 384 385 world_dir = tmp_path / "worlds" / "test" 386 world_dir.mkdir(parents=True) 387 vi = MagicMock() 388 vi.has_source.return_value = False 389 _populate_index( 390 world_dir, 391 vi, 392 srd_root=tmp_path / "srd-missing", 393 ) 394 vi.reindex_directory.assert_called_once_with( 395 world_dir, 396 source="world", 397 skip_subdirs=frozenset({"transcripts"}), 398 ) 399 400 def test_srd_sections_dir(self, tmp_path): 401 from unittest.mock import MagicMock 402 403 from storied.mcp_server import _populate_index 404 405 srd_root = tmp_path / "srd-5.2.1" 406 srd_dir = srd_root / "sections" 407 srd_dir.mkdir(parents=True) 408 world_dir = tmp_path / "worlds" / "test" 409 vi = MagicMock() 410 vi.has_source.return_value = False 411 _populate_index(world_dir, vi, srd_root=srd_root) 412 # SRD sections present → reindex SRD; no user layer, no world dir 413 assert vi.reindex_directory.call_count == 1 414 vi.reindex_directory.assert_called_with(srd_dir, source="srd") 415 416 def test_user_layer_indexed(self, tmp_path): 417 """When the user homebrew directory exists, _populate_index 418 reindexes it with source='user' after the shipped SRD.""" 419 from unittest.mock import MagicMock, call 420 421 from storied.mcp_server import _populate_index 422 423 # The autouse fixture sets _user_rules_home = tmp_path / "rules". 424 user_dir = tmp_path / "rules" 425 user_dir.mkdir(parents=True) 426 (user_dir / "monsters").mkdir() 427 (user_dir / "monsters" / "homebrew.md").write_text("# Homebrew") 428 429 world_dir = tmp_path / "worlds" / "test" 430 world_dir.mkdir(parents=True) 431 432 vi = MagicMock() 433 vi.has_source.return_value = False 434 _populate_index( 435 world_dir, 436 vi, 437 srd_root=tmp_path / "srd-missing", 438 ) 439 # Should have indexed user and world, in that order 440 assert vi.reindex_directory.call_args_list == [ 441 call(user_dir, source="user"), 442 call( 443 world_dir, 444 source="world", 445 skip_subdirs=frozenset({"transcripts"}), 446 ), 447 ] 448 449 def test_populate_index_is_idempotent(self, tmp_path): 450 """Once the SRD is seeded, subsequent calls must not reseed 451 (which would wipe world/transcript rows by file-copying the SRD 452 db over the live one).""" 453 from unittest.mock import MagicMock 454 455 from storied.mcp_server import _populate_index 456 457 srd_root = tmp_path / "srd-5.2.1" 458 srd_root.mkdir(parents=True) 459 srd_seed = srd_root / "search.db" 460 srd_seed.write_bytes(b"sqlite stub") 461 world_dir = tmp_path / "worlds" / "test" 462 world_dir.mkdir(parents=True) 463 464 vi = MagicMock() 465 vi.has_source.return_value = True # SRD already seeded 466 _populate_index(world_dir, vi, srd_root=srd_root) 467 vi.reseed.assert_not_called() 468 469 def test_flip_helpers_no_op_when_root_unset(self): 470 """The combat-tag flip helpers must not crash when no top-level 471 server has been registered (e.g. when the combat module is imported 472 in isolation by a test before any compose_server call).""" 473 import storied.tools.combat as combat_mod 474 475 # Snapshot and clear _root for the duration of the test 476 saved_root = combat_mod._root 477 saved_keys = combat_mod._combat_keys_to_hide 478 combat_mod._root = None 479 combat_mod._combat_keys_to_hide = set() 480 try: 481 combat_mod._flip_into_combat() # should silently no-op 482 combat_mod._flip_out_of_combat() 483 finally: 484 combat_mod._root = saved_root 485 combat_mod._combat_keys_to_hide = saved_keys 486 487 def test_srd_seed_db_takes_priority(self, tmp_path): 488 from unittest.mock import MagicMock 489 490 from storied.mcp_server import _populate_index 491 492 srd_root = tmp_path / "srd-5.2.1" 493 srd_root.mkdir(parents=True) 494 srd_seed = srd_root / "search.db" 495 srd_seed.write_bytes(b"sqlite stub") 496 # Also create the sections dir to verify the seed wins 497 (srd_root / "sections").mkdir() 498 world_dir = tmp_path / "worlds" / "test" 499 vi = MagicMock() 500 vi.has_source.return_value = False 501 _populate_index(world_dir, vi, srd_root=srd_root) 502 vi.reseed.assert_called_once_with(srd_seed) 503 # When the seed exists, we don't also reindex SRD sections 504 vi.reindex_directory.assert_not_called() 505 506 507class TestRulesLookupRace: 508 """Regression test for the transcript-upsert-vs-first-search race. 509 510 Before the fix, the lazy ``on_empty`` seeding would miss its window 511 whenever the DM's first turn didn't call ``recall``: the engine would 512 upsert the transcript at turn end, making the db non-empty, and the 513 next search would skip seeding because ``count > 0``. Rules lookups 514 would then return nothing. The fix eagerly populates in 515 ``start_server``, and ``_populate_index`` is idempotent via 516 ``has_source("srd")``. 517 """ 518 519 def test_srd_stays_available_after_transcript_upsert_race( 520 self, 521 tmp_path, 522 monkeypatch, 523 ): 524 from storied import paths 525 from storied.mcp_server import _populate_index 526 from storied.search import VectorIndex 527 528 # Minimal shipped SRD seed the populate helper can copy from. 529 srd_root = tmp_path / "shipped" / "srd-5.2.1" 530 srd_root.mkdir(parents=True) 531 seed_db = srd_root / "search.db" 532 seed_index = VectorIndex(seed_db) 533 seed_index.upsert( 534 "srd:character-origins.md:0", 535 "# Character Origins\n\n**Half-Elf** gets +2 Charisma.", 536 { 537 "source": "srd", 538 "content_type": "rules", 539 "path": str(srd_root / "sections" / "character-origins.md"), 540 "title": "Character Origins", 541 }, 542 ) 543 seed_index.close() 544 545 monkeypatch.setattr( 546 paths, 547 "shipped_rules_path", 548 lambda: tmp_path / "shipped", 549 ) 550 551 world_dir = paths.world_path("default") 552 world_dir.mkdir(parents=True, exist_ok=True) 553 db_path = world_dir / "search.db" 554 555 # Eager populate the way start_server now does. 556 vi = VectorIndex(db_path) 557 _populate_index(world_dir, vi) 558 assert vi.has_source("srd") 559 560 # Turn 1 ends without any recall — engine upserts the transcript. 561 (world_dir / "transcripts").mkdir(exist_ok=True) 562 day_path = world_dir / "transcripts" / "day+001.md" 563 day_path.write_text("### Day 1\n\nHey, welcome.") 564 vi.upsert( 565 "transcript:transcripts/day+001.md:0", 566 day_path.read_text(), 567 { 568 "source": "transcript", 569 "content_type": "transcripts", 570 "path": str(day_path), 571 "title": "Day 1", 572 "game_day": 1, 573 }, 574 ) 575 576 # Turn 2: the DM finally calls recall. SRD must still be there. 577 hits = vi.search( 578 "half-elf charisma", 579 limit=3, 580 source_filter=["srd", "user", "world"], 581 ) 582 assert len(hits) >= 1 583 assert any(h.source == "srd" for h in hits) 584 585 def test_populate_is_idempotent_on_repeated_start_server( 586 self, 587 tmp_path, 588 monkeypatch, 589 ): 590 """A second start_server (onboarding → play handoff) must not 591 wipe the world/transcript rows by re-copying the SRD seed.""" 592 from storied import paths 593 from storied.mcp_server import _populate_index 594 from storied.search import VectorIndex 595 596 srd_root = tmp_path / "shipped" / "srd-5.2.1" 597 srd_root.mkdir(parents=True) 598 seed_db = srd_root / "search.db" 599 seed_index = VectorIndex(seed_db) 600 seed_index.upsert( 601 "srd:x.md:0", 602 "# X", 603 {"source": "srd", "path": "x.md"}, 604 ) 605 seed_index.close() 606 607 monkeypatch.setattr( 608 paths, 609 "shipped_rules_path", 610 lambda: tmp_path / "shipped", 611 ) 612 613 world_dir = paths.world_path("default") 614 world_dir.mkdir(parents=True, exist_ok=True) 615 db_path = world_dir / "search.db" 616 617 vi = VectorIndex(db_path) 618 _populate_index(world_dir, vi) 619 620 # Upsert a transcript row to represent prior session state. 621 vi.upsert( 622 "transcript:transcripts/day+001.md:0", 623 "turn content", 624 {"source": "transcript", "path": "transcripts/day+001.md"}, 625 ) 626 assert vi.has_source("transcript") 627 628 # Second populate — must not wipe transcripts via reseed. 629 _populate_index(world_dir, vi) 630 assert vi.has_source("srd") 631 assert vi.has_source("transcript")