tests/test_names_engine.py at main

guid.foo / storied
fork
A 5e storytelling engine with an LLM DM
fork
storied / tests / test_names_engine.py
at main 305 lines 9.6 kB view raw
wrap content
Chris Guidry Tighten quality gates and refactor the test_execute_tool fixtures 14d ago
2d9d18ca
  1"""Tests for the storied.names phonotactic engine layer."""
  2
  3import pytest
  4
  5from storied.names.engine.clusters import has_forbidden_cluster, normalize_word
  6from storied.names.engine.generator import Engine
  7from storied.names.engine.inventory import PhonemeInventory, zipfian_weights
  8from storied.names.engine.rewrite import (
  9    apply_rules,
 10    capitalize_name,
 11    parse_rule,
 12)
 13from storied.names.engine.sonority import passes_sonority, sonority_class
 14from storied.names.engine.syllable import (
 15    parse_template,
 16    sample_syllable,
 17    sample_word,
 18)
 19
 20
 21class TestZipfianWeights:
 22    def test_empty(self):
 23        assert zipfian_weights(0) == []
 24
 25    def test_single(self):
 26        weights = zipfian_weights(1)
 27        assert weights == [1.0]
 28
 29    def test_normalized(self):
 30        weights = zipfian_weights(5)
 31        assert sum(weights) == pytest.approx(1.0)
 32
 33    def test_decreasing(self):
 34        weights = zipfian_weights(6)
 35        for i in range(len(weights) - 1):
 36            assert weights[i] > weights[i + 1]
 37
 38
 39class TestPhonemeInventory:
 40    @pytest.fixture
 41    def inventory(self) -> PhonemeInventory:
 42        return PhonemeInventory(
 43            name="test",
 44            consonants=["k", "t", "n", "m", "s", "r", "l"],
 45            vowels=["a", "e", "i", "o"],
 46            liquids=["r", "l"],
 47            nasals=["n", "m"],
 48        )
 49
 50    def test_classes_exposes_C_and_V(self, inventory: PhonemeInventory):
 51        classes = inventory.classes()
 52        assert classes["C"] == ["k", "t", "n", "m", "s", "r", "l"]
 53        assert classes["V"] == ["a", "e", "i", "o"]
 54
 55    def test_classes_uses_explicit_liquids(self, inventory: PhonemeInventory):
 56        assert inventory.classes()["L"] == ["r", "l"]
 57
 58    def test_classes_falls_back_to_heuristic_liquids(self):
 59        inv = PhonemeInventory(
 60            name="x",
 61            consonants=["t", "k", "l", "r", "m"],
 62            vowels=["a", "i"],
 63        )
 64        # No explicit liquids — heuristic should pick l and r
 65        liquids = inv.classes()["L"]
 66        assert "l" in liquids
 67        assert "r" in liquids
 68
 69    def test_sample_returns_phoneme(self, inventory: PhonemeInventory):
 70        import random
 71
 72        rng = random.Random(42)
 73        result = inventory.sample("V", rng)
 74        assert result in inventory.vowels
 75
 76    def test_sample_unknown_class_raises(self, inventory: PhonemeInventory):
 77        import random
 78
 79        rng = random.Random(42)
 80        with pytest.raises(ValueError, match="Z"):
 81            inventory.sample("Z", rng)
 82
 83    def test_trim_reduces_consonants(self, inventory: PhonemeInventory):
 84        import random
 85
 86        rng = random.Random(42)
 87        trimmed = inventory.trim(rng, drop_fraction=0.3)
 88        # 0.3 * 7 ≈ 2 dropped from 7
 89        assert len(trimmed.consonants) < len(inventory.consonants)
 90
 91    def test_trim_keeps_small_inventories(self):
 92        small = PhonemeInventory(
 93            name="tiny",
 94            consonants=["k", "n"],
 95            vowels=["a", "i"],
 96        )
 97        import random
 98
 99        rng = random.Random(42)
100        trimmed = small.trim(rng, drop_fraction=0.5)
101        assert trimmed.consonants == ["k", "n"]
102
103
104class TestSyllableParser:
105    def test_simple_template(self):
106        slots = parse_template("CV")
107        assert len(slots) == 2
108        assert slots[0].klass == "C"
109        assert not slots[0].optional
110        assert slots[1].klass == "V"
111
112    def test_optional_slot(self):
113        slots = parse_template("(C)V")
114        assert slots[0].optional
115        assert slots[1].klass == "V"
116        assert not slots[1].optional
117
118    def test_complex_template(self):
119        slots = parse_template("(C)(L)V(C)")
120        assert len(slots) == 4
121        assert [s.klass for s in slots] == ["C", "L", "V", "C"]
122        assert [s.optional for s in slots] == [True, True, False, True]
123
124    def test_unclosed_paren_raises(self):
125        with pytest.raises(ValueError, match="Unclosed paren"):
126            parse_template("(CV")
127
128    def test_unknown_class_raises(self):
129        with pytest.raises(ValueError, match="Unknown class letter"):
130            parse_template("CZV")
131
132    def test_multi_letter_optional_raises(self):
133        with pytest.raises(ValueError, match="single class letters"):
134            parse_template("(CV)")
135
136
137class TestSampleSyllable:
138    @pytest.fixture
139    def inventory(self) -> PhonemeInventory:
140        return PhonemeInventory(
141            name="x",
142            consonants=["k", "t"],
143            vowels=["a", "i"],
144        )
145
146    def test_returns_phoneme_list(self, inventory: PhonemeInventory):
147        import random
148
149        rng = random.Random(42)
150        slots = parse_template("CV")
151        result = sample_syllable(slots, inventory, rng)
152        assert isinstance(result, list)
153        assert len(result) == 2
154
155    def test_optional_slot_sometimes_skipped(
156        self,
157        inventory: PhonemeInventory,
158    ):
159        import random
160
161        rng = random.Random(42)
162        slots = parse_template("(C)V")
163        results = [sample_syllable(slots, inventory, rng) for _ in range(50)]
164        # Some should be 1-element (skipped optional), some 2
165        lengths = {len(r) for r in results}
166        assert 1 in lengths or 2 in lengths
167
168
169class TestSampleWord:
170    def test_produces_phonemes(self):
171        import random
172
173        rng = random.Random(42)
174        inv = PhonemeInventory(
175            name="x",
176            consonants=["k", "t", "n"],
177            vowels=["a", "i", "o"],
178        )
179        slots = [parse_template("CV")]
180        word = sample_word(slots, inv, rng, syllable_count=(2, 2))
181        assert len(word) == 4  # 2 syllables × 2 phonemes each
182
183
184class TestSonority:
185    def test_vowel_class(self):
186        assert sonority_class("a") == 8
187        assert sonority_class("e") == 8
188
189    def test_stop_class(self):
190        assert sonority_class("p") == 1
191
192    def test_unknown_defaults_to_4(self):
193        assert sonority_class("zzqq") == 4
194
195    def test_passes_short_word(self):
196        assert passes_sonority(["k", "a"])
197
198    def test_passes_canonical_cv(self):
199        assert passes_sonority(["k", "a", "t"])
200
201    def test_rejects_falling_onset(self):
202        # /rk/ at word start: liquid (sonority 6) before stop (1)
203        # That's a falling onset before the vowel, which the soft check catches
204        assert not passes_sonority(["r", "k", "a"])
205
206
207class TestClusters:
208    def test_normalize_joins(self):
209        assert normalize_word(["k", "a", "th"]) == "kath"
210
211    def test_no_forbidden_passes(self):
212        assert not has_forbidden_cluster("kath", [])
213
214    def test_substring_match(self):
215        assert has_forbidden_cluster("kathx", ["thx"])
216
217    def test_word_boundary_anchor_start(self):
218        # #sr means /sr/ only at word start
219        assert has_forbidden_cluster("srak", ["#sr"])
220        assert not has_forbidden_cluster("aksra", ["#sr"])
221
222    def test_word_boundary_anchor_end(self):
223        assert has_forbidden_cluster("katl", ["tl#"])
224        assert not has_forbidden_cluster("katla", ["tl#"])
225
226
227class TestRewrite:
228    def test_parse_simple_rule(self):
229        pat, repl = parse_rule("k -> g")
230        assert pat.pattern == "k"
231        assert repl == "g"
232
233    def test_apply_simple_substitution(self):
234        result = apply_rules("kakak", ["k -> g"])
235        assert result == "gagag"
236
237    def test_apply_anchored_rule(self):
238        result = apply_rules("hahah", ["h$ -> "])
239        assert result == "haha"
240
241    def test_apply_multiple_rules_in_order(self):
242        result = apply_rules("kath", ["th -> dh", "k -> g"])
243        assert result == "gadh"
244
245    def test_invalid_rule_skipped(self):
246        result = apply_rules("kath", ["no arrow", "k -> g"])
247        assert result == "gath"
248
249    def test_capitalize_name(self):
250        assert capitalize_name("aldric") == "Aldric"
251
252    def test_capitalize_empty(self):
253        assert capitalize_name("") == ""
254
255
256class TestEngine:
257    @pytest.fixture
258    def engine(self) -> Engine:
259        return Engine(
260            inventory=PhonemeInventory(
261                name="test",
262                consonants=["k", "t", "n", "m", "s", "l", "r"],
263                vowels=["a", "e", "i", "o"],
264                liquids=["l", "r"],
265                nasals=["n", "m"],
266            ),
267            templates=["(C)V(C)", "CV"],
268            forbidden_clusters=["#sr"],
269            rewrite_rules=["k -> c"],
270            min_length=3,
271            max_length=10,
272        )
273
274    def test_name_returns_string(self, engine: Engine):
275        name = engine.name(seed=42)
276        assert isinstance(name, str)
277        assert len(name) >= 3
278
279    def test_name_is_deterministic_for_seed(self, engine: Engine):
280        a = engine.name(seed=42)
281        b = engine.name(seed=42)
282        assert a == b
283
284    def test_name_is_capitalized(self, engine: Engine):
285        name = engine.name(seed=42)
286        assert name[0].isupper()
287
288    def test_rewrite_rule_applied(self, engine: Engine):
289        # Engine has "k -> c" rule, so no lowercase k should appear
290        names = engine.names(count=20, seed=42)
291        for name in names:
292            assert "k" not in name
293
294    def test_names_returns_distinct(self, engine: Engine):
295        names = engine.names(count=10, seed=42)
296        assert len(set(names)) == len(names)
297
298    def test_names_respects_count(self, engine: Engine):
299        names = engine.names(count=5, seed=42)
300        assert len(names) == 5
301
302    def test_names_within_length_envelope(self, engine: Engine):
303        names = engine.names(count=20, seed=42)
304        for name in names:
305            assert engine.min_length <= len(name) <= engine.max_length
Configure Feed

Configure Feed