A 5e storytelling engine with an LLM DM
1"""Tests for the FastMCP server composition.
2
3The orchestrator in storied.mcp_server builds a per-role top-level FastMCP
4server by mounting the tools/*.py module-level FastMCP instances and
5applying tag-based visibility filters. These tests verify the per-role
6tool visibility plus the dynamic combat-tag flip when initiative starts
7and ends.
8"""
9
10import asyncio
11
12import pytest
13
14from storied.mcp_server import _compose_server
15from storied.tools import ToolContext
16from storied.tools.character import refresh_advancement_visibility
17from storied.tools.combat import _flip_into_combat, _flip_out_of_combat
18
19
20def _names(role: str) -> set[str]:
21 async def _gather() -> set[str]:
22 server = await _compose_server(role)
23 return {t.name for t in await server.list_tools()}
24
25 return asyncio.run(_gather())
26
27
28class TestPerRoleComposition:
29 """Each role sees only the tools tagged for it."""
30
31 def test_dm_includes_core_narrative_tools(self):
32 names = _names("dm")
33 assert "set_scene" in names
34 assert "establish" in names
35 assert "mark" in names
36 assert "end_session" in names
37 assert "recall" in names
38 assert "roll" in names
39 assert "run_code" in names
40
41 def test_dm_includes_character_tools(self):
42 names = _names("dm")
43 for tool_name in (
44 "damage",
45 "heal",
46 "adjust_coins",
47 "add_effect",
48 "remove_effect",
49 "add_condition",
50 "remove_condition",
51 "add_item",
52 "remove_item",
53 "set_item_status",
54 "adjust_resource",
55 "rest",
56 "add_note",
57 "update_character",
58 "create_character",
59 ):
60 assert tool_name in names, f"missing {tool_name}"
61
62 def test_dm_does_not_include_removed_tools(self):
63 """break_concentration, use_resource, and restore_resource were
64 folded into other tools — ensure they don't resurface."""
65 names = _names("dm")
66 assert "break_concentration" not in names
67 assert "use_resource" not in names
68 assert "restore_resource" not in names
69
70 def test_dm_initial_excludes_combat_tools(self):
71 """In DM mode, combat tools are hidden until enter_initiative runs."""
72 names = _names("dm")
73 assert "next_turn" not in names
74 assert "add_combatant" not in names
75 assert "remove_combatant" not in names
76 assert "condition" not in names
77
78 def test_dm_initial_keeps_combat_control(self):
79 """enter_initiative and end_initiative stay visible so combat can begin."""
80 names = _names("dm")
81 assert "enter_initiative" in names
82 assert "end_initiative" in names
83
84 def test_planner_only_has_its_tools(self):
85 assert _names("planner") == {
86 "establish",
87 "mark",
88 "amend_mark",
89 "notify_dm",
90 "recall",
91 "forge_culture",
92 "generate_names",
93 }
94
95 def test_seeder_only_has_its_tools(self):
96 assert _names("seeder") == {
97 "establish",
98 "set_scene",
99 "forge_culture",
100 "generate_names",
101 }
102
103 def test_advancement_only_has_its_tools(self):
104 assert _names("advancement") == {"notify_dm", "recall", "update_character"}
105
106 def test_arc_architect_only_has_its_tools(self):
107 # The arc architect's whole job is critique-and-commit. Nothing
108 # else. World-building tools must not leak into this role.
109 assert _names("arc_architect") == {"commit_arc", "recall"}
110
111 def test_arc_architect_in_all_roles(self):
112 from storied.mcp_server import ALL_ROLES
113
114 assert "arc_architect" in ALL_ROLES
115
116
117class TestToolSchemas:
118 """Verify tool input schemas expose nested field shapes to the LLM.
119
120 These tests guard against regression to bare `dict` / `list` parameter
121 types, which leave the LLM with no guidance about which keys are required.
122 """
123
124 def _schema(self, tool_name: str) -> dict:
125 async def _gather() -> dict:
126 server = await _compose_server("dm")
127 for t in await server.list_tools():
128 if t.name == tool_name:
129 return t.parameters
130 raise AssertionError(f"tool {tool_name!r} not found")
131
132 return asyncio.run(_gather())
133
134 def test_enter_initiative_documents_combatant_shape(self):
135 schema = self._schema("enter_initiative")
136 item_schema = schema["properties"]["combatants"]["items"]
137 required = set(item_schema["required"])
138 assert {"name", "initiative", "hp", "hp_max", "ac"} <= required, (
139 f"enter_initiative must require all combatant fields, got {required}"
140 )
141 # is_player is optional but documented
142 assert "is_player" in item_schema["properties"]
143
144 def test_create_character_documents_ability_keys(self):
145 schema = self._schema("create_character")
146 ability_props = schema["properties"]["abilities"]["properties"]
147 for ability in (
148 "strength",
149 "dexterity",
150 "constitution",
151 "intelligence",
152 "wisdom",
153 "charisma",
154 ):
155 assert ability in ability_props, (
156 f"create_character must document the {ability} ability score"
157 )
158
159 def test_adjust_coins_documents_denominations(self):
160 schema = self._schema("adjust_coins")
161 delta_props = schema["properties"]["deltas"]["properties"]
162 for denom in ("cp", "sp", "ep", "gp", "pp"):
163 assert denom in delta_props, (
164 f"adjust_coins must document the {denom} denomination"
165 )
166
167 def test_create_character_purse_documents_denominations(self):
168 schema = self._schema("create_character")
169 purse = schema["properties"]["purse"]
170 # Purse is wrapped in anyOf for the | None
171 purse_props = next(
172 opt for opt in purse["anyOf"] if opt.get("type") == "object"
173 )["properties"]
174 for denom in ("cp", "sp", "ep", "gp", "pp"):
175 assert denom in purse_props
176
177 @pytest.mark.parametrize(
178 ("tool_name", "param", "expected_values"),
179 [
180 ("rest", "type", {"short", "long"}),
181 ("set_item_status", "status", {"attuned", "equipped", "carried"}),
182 ("recall", "scope", {"rules", "world", "all"}),
183 (
184 "establish",
185 "entity_type",
186 {
187 "npcs",
188 "locations",
189 "items",
190 "factions",
191 "threads",
192 "lore",
193 "maps",
194 "cultures",
195 },
196 ),
197 (
198 "mark",
199 "entity_type",
200 {
201 "npcs",
202 "locations",
203 "items",
204 "factions",
205 "threads",
206 "maps",
207 "cultures",
208 },
209 ),
210 (
211 "note_discovery",
212 "content_type",
213 {"npcs", "locations", "factions", "lore", "cultures"},
214 ),
215 ],
216 )
217 def test_enum_parameters_expose_valid_values(
218 self,
219 tool_name: str,
220 param: str,
221 expected_values: set[str],
222 ):
223 """Each conceptually-enum parameter must surface as a JSON Schema
224 enum, not a free-form string. Guards against regression to bare `str`."""
225 schema = self._schema(tool_name)
226 prop = schema["properties"][param]
227 # Default values wrap the enum in anyOf for `Type | None`; unwrap if needed
228 enum_values = prop.get("enum")
229 if enum_values is None and "anyOf" in prop:
230 for opt in prop["anyOf"]:
231 if "enum" in opt:
232 enum_values = opt["enum"]
233 break
234 assert enum_values is not None, (
235 f"{tool_name}.{param} should expose an enum, got {prop}"
236 )
237 assert set(enum_values) == expected_values
238
239 def test_combat_condition_enum_parameters(self):
240 """The combat `condition` tool is hidden in the default DM compose
241 (combat-only), so check it directly on the combat module."""
242 from storied.tools.combat import mcp as combat_mcp
243
244 async def _gather() -> dict:
245 for t in await combat_mcp.list_tools():
246 if t.name == "condition":
247 return t.parameters
248 raise AssertionError("condition tool not found")
249
250 params = asyncio.run(_gather())
251 assert set(params["properties"]["action"]["enum"]) == {"add", "remove"}
252 assert set(params["properties"]["ends_on"]["enum"]) == {"start", "end"}
253
254
255class TestCombatTagFlip:
256 """Entering and ending initiative toggles combat-tag visibility on the
257 composed top-level server."""
258
259 def test_flip_into_combat_shows_combat_tools(self, ctx: ToolContext):
260 async def _run() -> set[str]:
261 server = await _compose_server("dm")
262 _flip_into_combat()
263 return {t.name for t in await server.list_tools()}
264
265 names = asyncio.run(_run())
266 assert "next_turn" in names
267 assert "add_combatant" in names
268 assert "condition" in names
269 # Cleanup
270 _flip_out_of_combat()
271
272 def test_flip_out_of_combat_hides_combat_tools(self, ctx: ToolContext):
273 async def _run() -> set[str]:
274 server = await _compose_server("dm")
275 _flip_into_combat()
276 _flip_out_of_combat()
277 return {t.name for t in await server.list_tools()}
278
279 names = asyncio.run(_run())
280 assert "next_turn" not in names
281 assert "add_combatant" not in names
282
283 def test_combat_control_stays_visible_through_cycle(self, ctx: ToolContext):
284 """enter_initiative / end_initiative are tagged combat_control and
285 must stay visible whether initiative is active or not."""
286
287 async def _gather_combat_control() -> tuple[set[str], set[str], set[str]]:
288 server = await _compose_server("dm")
289 initial = {t.name for t in await server.list_tools()}
290 _flip_into_combat()
291 during = {t.name for t in await server.list_tools()}
292 _flip_out_of_combat()
293 after = {t.name for t in await server.list_tools()}
294 return initial, during, after
295
296 initial, during, after = asyncio.run(_gather_combat_control())
297 for state in (initial, during, after):
298 assert "enter_initiative" in state
299 assert "end_initiative" in state
300
301
302class TestAdvancementVisibility:
303 """level_up is hidden until the character has advancement_ready set."""
304
305 def test_level_up_hidden_at_compose_time(self, ctx: ToolContext):
306 """Fresh compose should not expose level_up — nothing has granted it yet."""
307
308 async def _gather() -> set[str]:
309 server = await _compose_server("dm")
310 return {t.name for t in await server.list_tools()}
311
312 names = asyncio.run(_gather())
313 assert "level_up" not in names
314
315 def test_level_up_revealed_when_advancement_ready(self, ctx: ToolContext):
316 async def _run() -> set[str]:
317 server = await _compose_server("dm")
318 refresh_advancement_visibility({"advancement_ready": 4})
319 return {t.name for t in await server.list_tools()}
320
321 names = asyncio.run(_run())
322 assert "level_up" in names
323 refresh_advancement_visibility(None) # cleanup
324
325 def test_level_up_hidden_again_when_flag_cleared(self, ctx: ToolContext):
326 async def _run() -> set[str]:
327 server = await _compose_server("dm")
328 refresh_advancement_visibility({"advancement_ready": 4})
329 refresh_advancement_visibility({"advancement_ready": None})
330 return {t.name for t in await server.list_tools()}
331
332 names = asyncio.run(_run())
333 assert "level_up" not in names
334
335 def test_level_up_not_in_planner_compose(self, ctx: ToolContext):
336 """Only the DM role cares about advancement visibility. Other roles
337 don't have level_up at all, so the flip is a no-op for them."""
338
339 async def _run() -> set[str]:
340 server = await _compose_server("planner")
341 return {t.name for t in await server.list_tools()}
342
343 names = asyncio.run(_run())
344 assert "level_up" not in names
345
346 def test_refresh_with_none_character_is_safe(self, ctx: ToolContext):
347 """A character sheet that doesn't exist yet (pre-creation) should
348 not crash the visibility flip."""
349
350 async def _run() -> None:
351 await _compose_server("dm")
352 refresh_advancement_visibility(None)
353
354 asyncio.run(_run())
355
356
357class TestPopulateIndex:
358 """Cover the SRD-seeding helper without launching a real server.
359
360 Tests pass an explicit ``srd_root`` pointed at a tmp path so the
361 real package rules directory isn't touched.
362 """
363
364 def test_no_srd_no_world_dir(self, tmp_path):
365 from unittest.mock import MagicMock
366
367 from storied.mcp_server import _populate_index
368
369 vi = MagicMock()
370 vi.has_source.return_value = False
371 _populate_index(
372 tmp_path / "worlds" / "missing",
373 vi,
374 srd_root=tmp_path / "srd-missing",
375 )
376 # No SRD seed, no SRD sections, no world dir → nothing should be called
377 vi.reseed.assert_not_called()
378 vi.reindex_directory.assert_not_called()
379
380 def test_world_dir_only(self, tmp_path):
381 from unittest.mock import MagicMock
382
383 from storied.mcp_server import _populate_index
384
385 world_dir = tmp_path / "worlds" / "test"
386 world_dir.mkdir(parents=True)
387 vi = MagicMock()
388 vi.has_source.return_value = False
389 _populate_index(
390 world_dir,
391 vi,
392 srd_root=tmp_path / "srd-missing",
393 )
394 vi.reindex_directory.assert_called_once_with(
395 world_dir,
396 source="world",
397 skip_subdirs=frozenset({"transcripts"}),
398 )
399
400 def test_srd_sections_dir(self, tmp_path):
401 from unittest.mock import MagicMock
402
403 from storied.mcp_server import _populate_index
404
405 srd_root = tmp_path / "srd-5.2.1"
406 srd_dir = srd_root / "sections"
407 srd_dir.mkdir(parents=True)
408 world_dir = tmp_path / "worlds" / "test"
409 vi = MagicMock()
410 vi.has_source.return_value = False
411 _populate_index(world_dir, vi, srd_root=srd_root)
412 # SRD sections present → reindex SRD; no user layer, no world dir
413 assert vi.reindex_directory.call_count == 1
414 vi.reindex_directory.assert_called_with(srd_dir, source="srd")
415
416 def test_user_layer_indexed(self, tmp_path):
417 """When the user homebrew directory exists, _populate_index
418 reindexes it with source='user' after the shipped SRD."""
419 from unittest.mock import MagicMock, call
420
421 from storied.mcp_server import _populate_index
422
423 # The autouse fixture sets _user_rules_home = tmp_path / "rules".
424 user_dir = tmp_path / "rules"
425 user_dir.mkdir(parents=True)
426 (user_dir / "monsters").mkdir()
427 (user_dir / "monsters" / "homebrew.md").write_text("# Homebrew")
428
429 world_dir = tmp_path / "worlds" / "test"
430 world_dir.mkdir(parents=True)
431
432 vi = MagicMock()
433 vi.has_source.return_value = False
434 _populate_index(
435 world_dir,
436 vi,
437 srd_root=tmp_path / "srd-missing",
438 )
439 # Should have indexed user and world, in that order
440 assert vi.reindex_directory.call_args_list == [
441 call(user_dir, source="user"),
442 call(
443 world_dir,
444 source="world",
445 skip_subdirs=frozenset({"transcripts"}),
446 ),
447 ]
448
449 def test_populate_index_is_idempotent(self, tmp_path):
450 """Once the SRD is seeded, subsequent calls must not reseed
451 (which would wipe world/transcript rows by file-copying the SRD
452 db over the live one)."""
453 from unittest.mock import MagicMock
454
455 from storied.mcp_server import _populate_index
456
457 srd_root = tmp_path / "srd-5.2.1"
458 srd_root.mkdir(parents=True)
459 srd_seed = srd_root / "search.db"
460 srd_seed.write_bytes(b"sqlite stub")
461 world_dir = tmp_path / "worlds" / "test"
462 world_dir.mkdir(parents=True)
463
464 vi = MagicMock()
465 vi.has_source.return_value = True # SRD already seeded
466 _populate_index(world_dir, vi, srd_root=srd_root)
467 vi.reseed.assert_not_called()
468
469 def test_flip_helpers_no_op_when_root_unset(self):
470 """The combat-tag flip helpers must not crash when no top-level
471 server has been registered (e.g. when the combat module is imported
472 in isolation by a test before any compose_server call)."""
473 import storied.tools.combat as combat_mod
474
475 # Snapshot and clear _root for the duration of the test
476 saved_root = combat_mod._root
477 saved_keys = combat_mod._combat_keys_to_hide
478 combat_mod._root = None
479 combat_mod._combat_keys_to_hide = set()
480 try:
481 combat_mod._flip_into_combat() # should silently no-op
482 combat_mod._flip_out_of_combat()
483 finally:
484 combat_mod._root = saved_root
485 combat_mod._combat_keys_to_hide = saved_keys
486
487 def test_srd_seed_db_takes_priority(self, tmp_path):
488 from unittest.mock import MagicMock
489
490 from storied.mcp_server import _populate_index
491
492 srd_root = tmp_path / "srd-5.2.1"
493 srd_root.mkdir(parents=True)
494 srd_seed = srd_root / "search.db"
495 srd_seed.write_bytes(b"sqlite stub")
496 # Also create the sections dir to verify the seed wins
497 (srd_root / "sections").mkdir()
498 world_dir = tmp_path / "worlds" / "test"
499 vi = MagicMock()
500 vi.has_source.return_value = False
501 _populate_index(world_dir, vi, srd_root=srd_root)
502 vi.reseed.assert_called_once_with(srd_seed)
503 # When the seed exists, we don't also reindex SRD sections
504 vi.reindex_directory.assert_not_called()
505
506
507class TestRulesLookupRace:
508 """Regression test for the transcript-upsert-vs-first-search race.
509
510 Before the fix, the lazy ``on_empty`` seeding would miss its window
511 whenever the DM's first turn didn't call ``recall``: the engine would
512 upsert the transcript at turn end, making the db non-empty, and the
513 next search would skip seeding because ``count > 0``. Rules lookups
514 would then return nothing. The fix eagerly populates in
515 ``start_server``, and ``_populate_index`` is idempotent via
516 ``has_source("srd")``.
517 """
518
519 def test_srd_stays_available_after_transcript_upsert_race(
520 self,
521 tmp_path,
522 monkeypatch,
523 ):
524 from storied import paths
525 from storied.mcp_server import _populate_index
526 from storied.search import VectorIndex
527
528 # Minimal shipped SRD seed the populate helper can copy from.
529 srd_root = tmp_path / "shipped" / "srd-5.2.1"
530 srd_root.mkdir(parents=True)
531 seed_db = srd_root / "search.db"
532 seed_index = VectorIndex(seed_db)
533 seed_index.upsert(
534 "srd:character-origins.md:0",
535 "# Character Origins\n\n**Half-Elf** gets +2 Charisma.",
536 {
537 "source": "srd",
538 "content_type": "rules",
539 "path": str(srd_root / "sections" / "character-origins.md"),
540 "title": "Character Origins",
541 },
542 )
543 seed_index.close()
544
545 monkeypatch.setattr(
546 paths,
547 "shipped_rules_path",
548 lambda: tmp_path / "shipped",
549 )
550
551 world_dir = paths.world_path("default")
552 world_dir.mkdir(parents=True, exist_ok=True)
553 db_path = world_dir / "search.db"
554
555 # Eager populate the way start_server now does.
556 vi = VectorIndex(db_path)
557 _populate_index(world_dir, vi)
558 assert vi.has_source("srd")
559
560 # Turn 1 ends without any recall — engine upserts the transcript.
561 (world_dir / "transcripts").mkdir(exist_ok=True)
562 day_path = world_dir / "transcripts" / "day+001.md"
563 day_path.write_text("### Day 1\n\nHey, welcome.")
564 vi.upsert(
565 "transcript:transcripts/day+001.md:0",
566 day_path.read_text(),
567 {
568 "source": "transcript",
569 "content_type": "transcripts",
570 "path": str(day_path),
571 "title": "Day 1",
572 "game_day": 1,
573 },
574 )
575
576 # Turn 2: the DM finally calls recall. SRD must still be there.
577 hits = vi.search(
578 "half-elf charisma",
579 limit=3,
580 source_filter=["srd", "user", "world"],
581 )
582 assert len(hits) >= 1
583 assert any(h.source == "srd" for h in hits)
584
585 def test_populate_is_idempotent_on_repeated_start_server(
586 self,
587 tmp_path,
588 monkeypatch,
589 ):
590 """A second start_server (onboarding → play handoff) must not
591 wipe the world/transcript rows by re-copying the SRD seed."""
592 from storied import paths
593 from storied.mcp_server import _populate_index
594 from storied.search import VectorIndex
595
596 srd_root = tmp_path / "shipped" / "srd-5.2.1"
597 srd_root.mkdir(parents=True)
598 seed_db = srd_root / "search.db"
599 seed_index = VectorIndex(seed_db)
600 seed_index.upsert(
601 "srd:x.md:0",
602 "# X",
603 {"source": "srd", "path": "x.md"},
604 )
605 seed_index.close()
606
607 monkeypatch.setattr(
608 paths,
609 "shipped_rules_path",
610 lambda: tmp_path / "shipped",
611 )
612
613 world_dir = paths.world_path("default")
614 world_dir.mkdir(parents=True, exist_ok=True)
615 db_path = world_dir / "search.db"
616
617 vi = VectorIndex(db_path)
618 _populate_index(world_dir, vi)
619
620 # Upsert a transcript row to represent prior session state.
621 vi.upsert(
622 "transcript:transcripts/day+001.md:0",
623 "turn content",
624 {"source": "transcript", "path": "transcripts/day+001.md"},
625 )
626 assert vi.has_source("transcript")
627
628 # Second populate — must not wipe transcripts via reseed.
629 _populate_index(world_dir, vi)
630 assert vi.has_source("srd")
631 assert vi.has_source("transcript")