tests/test_mcp_server.py at main

guid.foo / storied
fork
A 5e storytelling engine with an LLM DM
fork
storied / tests / test_mcp_server.py
at main 631 lines 23 kB view raw
wrap content
Chris Guidry Tighten quality gates and refactor the test_execute_tool fixtures 15d ago
2d9d18ca
  1"""Tests for the FastMCP server composition.
  2
  3The orchestrator in storied.mcp_server builds a per-role top-level FastMCP
  4server by mounting the tools/*.py module-level FastMCP instances and
  5applying tag-based visibility filters. These tests verify the per-role
  6tool visibility plus the dynamic combat-tag flip when initiative starts
  7and ends.
  8"""
  9
 10import asyncio
 11
 12import pytest
 13
 14from storied.mcp_server import _compose_server
 15from storied.tools import ToolContext
 16from storied.tools.character import refresh_advancement_visibility
 17from storied.tools.combat import _flip_into_combat, _flip_out_of_combat
 18
 19
 20def _names(role: str) -> set[str]:
 21    async def _gather() -> set[str]:
 22        server = await _compose_server(role)
 23        return {t.name for t in await server.list_tools()}
 24
 25    return asyncio.run(_gather())
 26
 27
 28class TestPerRoleComposition:
 29    """Each role sees only the tools tagged for it."""
 30
 31    def test_dm_includes_core_narrative_tools(self):
 32        names = _names("dm")
 33        assert "set_scene" in names
 34        assert "establish" in names
 35        assert "mark" in names
 36        assert "end_session" in names
 37        assert "recall" in names
 38        assert "roll" in names
 39        assert "run_code" in names
 40
 41    def test_dm_includes_character_tools(self):
 42        names = _names("dm")
 43        for tool_name in (
 44            "damage",
 45            "heal",
 46            "adjust_coins",
 47            "add_effect",
 48            "remove_effect",
 49            "add_condition",
 50            "remove_condition",
 51            "add_item",
 52            "remove_item",
 53            "set_item_status",
 54            "adjust_resource",
 55            "rest",
 56            "add_note",
 57            "update_character",
 58            "create_character",
 59        ):
 60            assert tool_name in names, f"missing {tool_name}"
 61
 62    def test_dm_does_not_include_removed_tools(self):
 63        """break_concentration, use_resource, and restore_resource were
 64        folded into other tools — ensure they don't resurface."""
 65        names = _names("dm")
 66        assert "break_concentration" not in names
 67        assert "use_resource" not in names
 68        assert "restore_resource" not in names
 69
 70    def test_dm_initial_excludes_combat_tools(self):
 71        """In DM mode, combat tools are hidden until enter_initiative runs."""
 72        names = _names("dm")
 73        assert "next_turn" not in names
 74        assert "add_combatant" not in names
 75        assert "remove_combatant" not in names
 76        assert "condition" not in names
 77
 78    def test_dm_initial_keeps_combat_control(self):
 79        """enter_initiative and end_initiative stay visible so combat can begin."""
 80        names = _names("dm")
 81        assert "enter_initiative" in names
 82        assert "end_initiative" in names
 83
 84    def test_planner_only_has_its_tools(self):
 85        assert _names("planner") == {
 86            "establish",
 87            "mark",
 88            "amend_mark",
 89            "notify_dm",
 90            "recall",
 91            "forge_culture",
 92            "generate_names",
 93        }
 94
 95    def test_seeder_only_has_its_tools(self):
 96        assert _names("seeder") == {
 97            "establish",
 98            "set_scene",
 99            "forge_culture",
100            "generate_names",
101        }
102
103    def test_advancement_only_has_its_tools(self):
104        assert _names("advancement") == {"notify_dm", "recall", "update_character"}
105
106    def test_arc_architect_only_has_its_tools(self):
107        # The arc architect's whole job is critique-and-commit. Nothing
108        # else. World-building tools must not leak into this role.
109        assert _names("arc_architect") == {"commit_arc", "recall"}
110
111    def test_arc_architect_in_all_roles(self):
112        from storied.mcp_server import ALL_ROLES
113
114        assert "arc_architect" in ALL_ROLES
115
116
117class TestToolSchemas:
118    """Verify tool input schemas expose nested field shapes to the LLM.
119
120    These tests guard against regression to bare `dict` / `list` parameter
121    types, which leave the LLM with no guidance about which keys are required.
122    """
123
124    def _schema(self, tool_name: str) -> dict:
125        async def _gather() -> dict:
126            server = await _compose_server("dm")
127            for t in await server.list_tools():
128                if t.name == tool_name:
129                    return t.parameters
130            raise AssertionError(f"tool {tool_name!r} not found")
131
132        return asyncio.run(_gather())
133
134    def test_enter_initiative_documents_combatant_shape(self):
135        schema = self._schema("enter_initiative")
136        item_schema = schema["properties"]["combatants"]["items"]
137        required = set(item_schema["required"])
138        assert {"name", "initiative", "hp", "hp_max", "ac"} <= required, (
139            f"enter_initiative must require all combatant fields, got {required}"
140        )
141        # is_player is optional but documented
142        assert "is_player" in item_schema["properties"]
143
144    def test_create_character_documents_ability_keys(self):
145        schema = self._schema("create_character")
146        ability_props = schema["properties"]["abilities"]["properties"]
147        for ability in (
148            "strength",
149            "dexterity",
150            "constitution",
151            "intelligence",
152            "wisdom",
153            "charisma",
154        ):
155            assert ability in ability_props, (
156                f"create_character must document the {ability} ability score"
157            )
158
159    def test_adjust_coins_documents_denominations(self):
160        schema = self._schema("adjust_coins")
161        delta_props = schema["properties"]["deltas"]["properties"]
162        for denom in ("cp", "sp", "ep", "gp", "pp"):
163            assert denom in delta_props, (
164                f"adjust_coins must document the {denom} denomination"
165            )
166
167    def test_create_character_purse_documents_denominations(self):
168        schema = self._schema("create_character")
169        purse = schema["properties"]["purse"]
170        # Purse is wrapped in anyOf for the | None
171        purse_props = next(
172            opt for opt in purse["anyOf"] if opt.get("type") == "object"
173        )["properties"]
174        for denom in ("cp", "sp", "ep", "gp", "pp"):
175            assert denom in purse_props
176
177    @pytest.mark.parametrize(
178        ("tool_name", "param", "expected_values"),
179        [
180            ("rest", "type", {"short", "long"}),
181            ("set_item_status", "status", {"attuned", "equipped", "carried"}),
182            ("recall", "scope", {"rules", "world", "all"}),
183            (
184                "establish",
185                "entity_type",
186                {
187                    "npcs",
188                    "locations",
189                    "items",
190                    "factions",
191                    "threads",
192                    "lore",
193                    "maps",
194                    "cultures",
195                },
196            ),
197            (
198                "mark",
199                "entity_type",
200                {
201                    "npcs",
202                    "locations",
203                    "items",
204                    "factions",
205                    "threads",
206                    "maps",
207                    "cultures",
208                },
209            ),
210            (
211                "note_discovery",
212                "content_type",
213                {"npcs", "locations", "factions", "lore", "cultures"},
214            ),
215        ],
216    )
217    def test_enum_parameters_expose_valid_values(
218        self,
219        tool_name: str,
220        param: str,
221        expected_values: set[str],
222    ):
223        """Each conceptually-enum parameter must surface as a JSON Schema
224        enum, not a free-form string. Guards against regression to bare `str`."""
225        schema = self._schema(tool_name)
226        prop = schema["properties"][param]
227        # Default values wrap the enum in anyOf for `Type | None`; unwrap if needed
228        enum_values = prop.get("enum")
229        if enum_values is None and "anyOf" in prop:
230            for opt in prop["anyOf"]:
231                if "enum" in opt:
232                    enum_values = opt["enum"]
233                    break
234        assert enum_values is not None, (
235            f"{tool_name}.{param} should expose an enum, got {prop}"
236        )
237        assert set(enum_values) == expected_values
238
239    def test_combat_condition_enum_parameters(self):
240        """The combat `condition` tool is hidden in the default DM compose
241        (combat-only), so check it directly on the combat module."""
242        from storied.tools.combat import mcp as combat_mcp
243
244        async def _gather() -> dict:
245            for t in await combat_mcp.list_tools():
246                if t.name == "condition":
247                    return t.parameters
248            raise AssertionError("condition tool not found")
249
250        params = asyncio.run(_gather())
251        assert set(params["properties"]["action"]["enum"]) == {"add", "remove"}
252        assert set(params["properties"]["ends_on"]["enum"]) == {"start", "end"}
253
254
255class TestCombatTagFlip:
256    """Entering and ending initiative toggles combat-tag visibility on the
257    composed top-level server."""
258
259    def test_flip_into_combat_shows_combat_tools(self, ctx: ToolContext):
260        async def _run() -> set[str]:
261            server = await _compose_server("dm")
262            _flip_into_combat()
263            return {t.name for t in await server.list_tools()}
264
265        names = asyncio.run(_run())
266        assert "next_turn" in names
267        assert "add_combatant" in names
268        assert "condition" in names
269        # Cleanup
270        _flip_out_of_combat()
271
272    def test_flip_out_of_combat_hides_combat_tools(self, ctx: ToolContext):
273        async def _run() -> set[str]:
274            server = await _compose_server("dm")
275            _flip_into_combat()
276            _flip_out_of_combat()
277            return {t.name for t in await server.list_tools()}
278
279        names = asyncio.run(_run())
280        assert "next_turn" not in names
281        assert "add_combatant" not in names
282
283    def test_combat_control_stays_visible_through_cycle(self, ctx: ToolContext):
284        """enter_initiative / end_initiative are tagged combat_control and
285        must stay visible whether initiative is active or not."""
286
287        async def _gather_combat_control() -> tuple[set[str], set[str], set[str]]:
288            server = await _compose_server("dm")
289            initial = {t.name for t in await server.list_tools()}
290            _flip_into_combat()
291            during = {t.name for t in await server.list_tools()}
292            _flip_out_of_combat()
293            after = {t.name for t in await server.list_tools()}
294            return initial, during, after
295
296        initial, during, after = asyncio.run(_gather_combat_control())
297        for state in (initial, during, after):
298            assert "enter_initiative" in state
299            assert "end_initiative" in state
300
301
302class TestAdvancementVisibility:
303    """level_up is hidden until the character has advancement_ready set."""
304
305    def test_level_up_hidden_at_compose_time(self, ctx: ToolContext):
306        """Fresh compose should not expose level_up — nothing has granted it yet."""
307
308        async def _gather() -> set[str]:
309            server = await _compose_server("dm")
310            return {t.name for t in await server.list_tools()}
311
312        names = asyncio.run(_gather())
313        assert "level_up" not in names
314
315    def test_level_up_revealed_when_advancement_ready(self, ctx: ToolContext):
316        async def _run() -> set[str]:
317            server = await _compose_server("dm")
318            refresh_advancement_visibility({"advancement_ready": 4})
319            return {t.name for t in await server.list_tools()}
320
321        names = asyncio.run(_run())
322        assert "level_up" in names
323        refresh_advancement_visibility(None)  # cleanup
324
325    def test_level_up_hidden_again_when_flag_cleared(self, ctx: ToolContext):
326        async def _run() -> set[str]:
327            server = await _compose_server("dm")
328            refresh_advancement_visibility({"advancement_ready": 4})
329            refresh_advancement_visibility({"advancement_ready": None})
330            return {t.name for t in await server.list_tools()}
331
332        names = asyncio.run(_run())
333        assert "level_up" not in names
334
335    def test_level_up_not_in_planner_compose(self, ctx: ToolContext):
336        """Only the DM role cares about advancement visibility. Other roles
337        don't have level_up at all, so the flip is a no-op for them."""
338
339        async def _run() -> set[str]:
340            server = await _compose_server("planner")
341            return {t.name for t in await server.list_tools()}
342
343        names = asyncio.run(_run())
344        assert "level_up" not in names
345
346    def test_refresh_with_none_character_is_safe(self, ctx: ToolContext):
347        """A character sheet that doesn't exist yet (pre-creation) should
348        not crash the visibility flip."""
349
350        async def _run() -> None:
351            await _compose_server("dm")
352            refresh_advancement_visibility(None)
353
354        asyncio.run(_run())
355
356
357class TestPopulateIndex:
358    """Cover the SRD-seeding helper without launching a real server.
359
360    Tests pass an explicit ``srd_root`` pointed at a tmp path so the
361    real package rules directory isn't touched.
362    """
363
364    def test_no_srd_no_world_dir(self, tmp_path):
365        from unittest.mock import MagicMock
366
367        from storied.mcp_server import _populate_index
368
369        vi = MagicMock()
370        vi.has_source.return_value = False
371        _populate_index(
372            tmp_path / "worlds" / "missing",
373            vi,
374            srd_root=tmp_path / "srd-missing",
375        )
376        # No SRD seed, no SRD sections, no world dir → nothing should be called
377        vi.reseed.assert_not_called()
378        vi.reindex_directory.assert_not_called()
379
380    def test_world_dir_only(self, tmp_path):
381        from unittest.mock import MagicMock
382
383        from storied.mcp_server import _populate_index
384
385        world_dir = tmp_path / "worlds" / "test"
386        world_dir.mkdir(parents=True)
387        vi = MagicMock()
388        vi.has_source.return_value = False
389        _populate_index(
390            world_dir,
391            vi,
392            srd_root=tmp_path / "srd-missing",
393        )
394        vi.reindex_directory.assert_called_once_with(
395            world_dir,
396            source="world",
397            skip_subdirs=frozenset({"transcripts"}),
398        )
399
400    def test_srd_sections_dir(self, tmp_path):
401        from unittest.mock import MagicMock
402
403        from storied.mcp_server import _populate_index
404
405        srd_root = tmp_path / "srd-5.2.1"
406        srd_dir = srd_root / "sections"
407        srd_dir.mkdir(parents=True)
408        world_dir = tmp_path / "worlds" / "test"
409        vi = MagicMock()
410        vi.has_source.return_value = False
411        _populate_index(world_dir, vi, srd_root=srd_root)
412        # SRD sections present → reindex SRD; no user layer, no world dir
413        assert vi.reindex_directory.call_count == 1
414        vi.reindex_directory.assert_called_with(srd_dir, source="srd")
415
416    def test_user_layer_indexed(self, tmp_path):
417        """When the user homebrew directory exists, _populate_index
418        reindexes it with source='user' after the shipped SRD."""
419        from unittest.mock import MagicMock, call
420
421        from storied.mcp_server import _populate_index
422
423        # The autouse fixture sets _user_rules_home = tmp_path / "rules".
424        user_dir = tmp_path / "rules"
425        user_dir.mkdir(parents=True)
426        (user_dir / "monsters").mkdir()
427        (user_dir / "monsters" / "homebrew.md").write_text("# Homebrew")
428
429        world_dir = tmp_path / "worlds" / "test"
430        world_dir.mkdir(parents=True)
431
432        vi = MagicMock()
433        vi.has_source.return_value = False
434        _populate_index(
435            world_dir,
436            vi,
437            srd_root=tmp_path / "srd-missing",
438        )
439        # Should have indexed user and world, in that order
440        assert vi.reindex_directory.call_args_list == [
441            call(user_dir, source="user"),
442            call(
443                world_dir,
444                source="world",
445                skip_subdirs=frozenset({"transcripts"}),
446            ),
447        ]
448
449    def test_populate_index_is_idempotent(self, tmp_path):
450        """Once the SRD is seeded, subsequent calls must not reseed
451        (which would wipe world/transcript rows by file-copying the SRD
452        db over the live one)."""
453        from unittest.mock import MagicMock
454
455        from storied.mcp_server import _populate_index
456
457        srd_root = tmp_path / "srd-5.2.1"
458        srd_root.mkdir(parents=True)
459        srd_seed = srd_root / "search.db"
460        srd_seed.write_bytes(b"sqlite stub")
461        world_dir = tmp_path / "worlds" / "test"
462        world_dir.mkdir(parents=True)
463
464        vi = MagicMock()
465        vi.has_source.return_value = True  # SRD already seeded
466        _populate_index(world_dir, vi, srd_root=srd_root)
467        vi.reseed.assert_not_called()
468
469    def test_flip_helpers_no_op_when_root_unset(self):
470        """The combat-tag flip helpers must not crash when no top-level
471        server has been registered (e.g. when the combat module is imported
472        in isolation by a test before any compose_server call)."""
473        import storied.tools.combat as combat_mod
474
475        # Snapshot and clear _root for the duration of the test
476        saved_root = combat_mod._root
477        saved_keys = combat_mod._combat_keys_to_hide
478        combat_mod._root = None
479        combat_mod._combat_keys_to_hide = set()
480        try:
481            combat_mod._flip_into_combat()  # should silently no-op
482            combat_mod._flip_out_of_combat()
483        finally:
484            combat_mod._root = saved_root
485            combat_mod._combat_keys_to_hide = saved_keys
486
487    def test_srd_seed_db_takes_priority(self, tmp_path):
488        from unittest.mock import MagicMock
489
490        from storied.mcp_server import _populate_index
491
492        srd_root = tmp_path / "srd-5.2.1"
493        srd_root.mkdir(parents=True)
494        srd_seed = srd_root / "search.db"
495        srd_seed.write_bytes(b"sqlite stub")
496        # Also create the sections dir to verify the seed wins
497        (srd_root / "sections").mkdir()
498        world_dir = tmp_path / "worlds" / "test"
499        vi = MagicMock()
500        vi.has_source.return_value = False
501        _populate_index(world_dir, vi, srd_root=srd_root)
502        vi.reseed.assert_called_once_with(srd_seed)
503        # When the seed exists, we don't also reindex SRD sections
504        vi.reindex_directory.assert_not_called()
505
506
507class TestRulesLookupRace:
508    """Regression test for the transcript-upsert-vs-first-search race.
509
510    Before the fix, the lazy ``on_empty`` seeding would miss its window
511    whenever the DM's first turn didn't call ``recall``: the engine would
512    upsert the transcript at turn end, making the db non-empty, and the
513    next search would skip seeding because ``count > 0``. Rules lookups
514    would then return nothing. The fix eagerly populates in
515    ``start_server``, and ``_populate_index`` is idempotent via
516    ``has_source("srd")``.
517    """
518
519    def test_srd_stays_available_after_transcript_upsert_race(
520        self,
521        tmp_path,
522        monkeypatch,
523    ):
524        from storied import paths
525        from storied.mcp_server import _populate_index
526        from storied.search import VectorIndex
527
528        # Minimal shipped SRD seed the populate helper can copy from.
529        srd_root = tmp_path / "shipped" / "srd-5.2.1"
530        srd_root.mkdir(parents=True)
531        seed_db = srd_root / "search.db"
532        seed_index = VectorIndex(seed_db)
533        seed_index.upsert(
534            "srd:character-origins.md:0",
535            "# Character Origins\n\n**Half-Elf** gets +2 Charisma.",
536            {
537                "source": "srd",
538                "content_type": "rules",
539                "path": str(srd_root / "sections" / "character-origins.md"),
540                "title": "Character Origins",
541            },
542        )
543        seed_index.close()
544
545        monkeypatch.setattr(
546            paths,
547            "shipped_rules_path",
548            lambda: tmp_path / "shipped",
549        )
550
551        world_dir = paths.world_path("default")
552        world_dir.mkdir(parents=True, exist_ok=True)
553        db_path = world_dir / "search.db"
554
555        # Eager populate the way start_server now does.
556        vi = VectorIndex(db_path)
557        _populate_index(world_dir, vi)
558        assert vi.has_source("srd")
559
560        # Turn 1 ends without any recall — engine upserts the transcript.
561        (world_dir / "transcripts").mkdir(exist_ok=True)
562        day_path = world_dir / "transcripts" / "day+001.md"
563        day_path.write_text("### Day 1\n\nHey, welcome.")
564        vi.upsert(
565            "transcript:transcripts/day+001.md:0",
566            day_path.read_text(),
567            {
568                "source": "transcript",
569                "content_type": "transcripts",
570                "path": str(day_path),
571                "title": "Day 1",
572                "game_day": 1,
573            },
574        )
575
576        # Turn 2: the DM finally calls recall. SRD must still be there.
577        hits = vi.search(
578            "half-elf charisma",
579            limit=3,
580            source_filter=["srd", "user", "world"],
581        )
582        assert len(hits) >= 1
583        assert any(h.source == "srd" for h in hits)
584
585    def test_populate_is_idempotent_on_repeated_start_server(
586        self,
587        tmp_path,
588        monkeypatch,
589    ):
590        """A second start_server (onboarding → play handoff) must not
591        wipe the world/transcript rows by re-copying the SRD seed."""
592        from storied import paths
593        from storied.mcp_server import _populate_index
594        from storied.search import VectorIndex
595
596        srd_root = tmp_path / "shipped" / "srd-5.2.1"
597        srd_root.mkdir(parents=True)
598        seed_db = srd_root / "search.db"
599        seed_index = VectorIndex(seed_db)
600        seed_index.upsert(
601            "srd:x.md:0",
602            "# X",
603            {"source": "srd", "path": "x.md"},
604        )
605        seed_index.close()
606
607        monkeypatch.setattr(
608            paths,
609            "shipped_rules_path",
610            lambda: tmp_path / "shipped",
611        )
612
613        world_dir = paths.world_path("default")
614        world_dir.mkdir(parents=True, exist_ok=True)
615        db_path = world_dir / "search.db"
616
617        vi = VectorIndex(db_path)
618        _populate_index(world_dir, vi)
619
620        # Upsert a transcript row to represent prior session state.
621        vi.upsert(
622            "transcript:transcripts/day+001.md:0",
623            "turn content",
624            {"source": "transcript", "path": "transcripts/day+001.md"},
625        )
626        assert vi.has_source("transcript")
627
628        # Second populate — must not wipe transcripts via reseed.
629        _populate_index(world_dir, vi)
630        assert vi.has_source("srd")
631        assert vi.has_source("transcript")
Configure Feed

Configure Feed