A 5e storytelling engine with an LLM DM
1"""Tests for the storied.names phonotactic engine layer."""
2
3import pytest
4
5from storied.names.engine.clusters import has_forbidden_cluster, normalize_word
6from storied.names.engine.generator import Engine
7from storied.names.engine.inventory import PhonemeInventory, zipfian_weights
8from storied.names.engine.rewrite import (
9 apply_rules,
10 capitalize_name,
11 parse_rule,
12)
13from storied.names.engine.sonority import passes_sonority, sonority_class
14from storied.names.engine.syllable import (
15 parse_template,
16 sample_syllable,
17 sample_word,
18)
19
20
21class TestZipfianWeights:
22 def test_empty(self):
23 assert zipfian_weights(0) == []
24
25 def test_single(self):
26 weights = zipfian_weights(1)
27 assert weights == [1.0]
28
29 def test_normalized(self):
30 weights = zipfian_weights(5)
31 assert sum(weights) == pytest.approx(1.0)
32
33 def test_decreasing(self):
34 weights = zipfian_weights(6)
35 for i in range(len(weights) - 1):
36 assert weights[i] > weights[i + 1]
37
38
39class TestPhonemeInventory:
40 @pytest.fixture
41 def inventory(self) -> PhonemeInventory:
42 return PhonemeInventory(
43 name="test",
44 consonants=["k", "t", "n", "m", "s", "r", "l"],
45 vowels=["a", "e", "i", "o"],
46 liquids=["r", "l"],
47 nasals=["n", "m"],
48 )
49
50 def test_classes_exposes_C_and_V(self, inventory: PhonemeInventory):
51 classes = inventory.classes()
52 assert classes["C"] == ["k", "t", "n", "m", "s", "r", "l"]
53 assert classes["V"] == ["a", "e", "i", "o"]
54
55 def test_classes_uses_explicit_liquids(self, inventory: PhonemeInventory):
56 assert inventory.classes()["L"] == ["r", "l"]
57
58 def test_classes_falls_back_to_heuristic_liquids(self):
59 inv = PhonemeInventory(
60 name="x",
61 consonants=["t", "k", "l", "r", "m"],
62 vowels=["a", "i"],
63 )
64 # No explicit liquids — heuristic should pick l and r
65 liquids = inv.classes()["L"]
66 assert "l" in liquids
67 assert "r" in liquids
68
69 def test_sample_returns_phoneme(self, inventory: PhonemeInventory):
70 import random
71
72 rng = random.Random(42)
73 result = inventory.sample("V", rng)
74 assert result in inventory.vowels
75
76 def test_sample_unknown_class_raises(self, inventory: PhonemeInventory):
77 import random
78
79 rng = random.Random(42)
80 with pytest.raises(ValueError, match="Z"):
81 inventory.sample("Z", rng)
82
83 def test_trim_reduces_consonants(self, inventory: PhonemeInventory):
84 import random
85
86 rng = random.Random(42)
87 trimmed = inventory.trim(rng, drop_fraction=0.3)
88 # 0.3 * 7 ≈ 2 dropped from 7
89 assert len(trimmed.consonants) < len(inventory.consonants)
90
91 def test_trim_keeps_small_inventories(self):
92 small = PhonemeInventory(
93 name="tiny",
94 consonants=["k", "n"],
95 vowels=["a", "i"],
96 )
97 import random
98
99 rng = random.Random(42)
100 trimmed = small.trim(rng, drop_fraction=0.5)
101 assert trimmed.consonants == ["k", "n"]
102
103
104class TestSyllableParser:
105 def test_simple_template(self):
106 slots = parse_template("CV")
107 assert len(slots) == 2
108 assert slots[0].klass == "C"
109 assert not slots[0].optional
110 assert slots[1].klass == "V"
111
112 def test_optional_slot(self):
113 slots = parse_template("(C)V")
114 assert slots[0].optional
115 assert slots[1].klass == "V"
116 assert not slots[1].optional
117
118 def test_complex_template(self):
119 slots = parse_template("(C)(L)V(C)")
120 assert len(slots) == 4
121 assert [s.klass for s in slots] == ["C", "L", "V", "C"]
122 assert [s.optional for s in slots] == [True, True, False, True]
123
124 def test_unclosed_paren_raises(self):
125 with pytest.raises(ValueError, match="Unclosed paren"):
126 parse_template("(CV")
127
128 def test_unknown_class_raises(self):
129 with pytest.raises(ValueError, match="Unknown class letter"):
130 parse_template("CZV")
131
132 def test_multi_letter_optional_raises(self):
133 with pytest.raises(ValueError, match="single class letters"):
134 parse_template("(CV)")
135
136
137class TestSampleSyllable:
138 @pytest.fixture
139 def inventory(self) -> PhonemeInventory:
140 return PhonemeInventory(
141 name="x",
142 consonants=["k", "t"],
143 vowels=["a", "i"],
144 )
145
146 def test_returns_phoneme_list(self, inventory: PhonemeInventory):
147 import random
148
149 rng = random.Random(42)
150 slots = parse_template("CV")
151 result = sample_syllable(slots, inventory, rng)
152 assert isinstance(result, list)
153 assert len(result) == 2
154
155 def test_optional_slot_sometimes_skipped(
156 self,
157 inventory: PhonemeInventory,
158 ):
159 import random
160
161 rng = random.Random(42)
162 slots = parse_template("(C)V")
163 results = [sample_syllable(slots, inventory, rng) for _ in range(50)]
164 # Some should be 1-element (skipped optional), some 2
165 lengths = {len(r) for r in results}
166 assert 1 in lengths or 2 in lengths
167
168
169class TestSampleWord:
170 def test_produces_phonemes(self):
171 import random
172
173 rng = random.Random(42)
174 inv = PhonemeInventory(
175 name="x",
176 consonants=["k", "t", "n"],
177 vowels=["a", "i", "o"],
178 )
179 slots = [parse_template("CV")]
180 word = sample_word(slots, inv, rng, syllable_count=(2, 2))
181 assert len(word) == 4 # 2 syllables × 2 phonemes each
182
183
184class TestSonority:
185 def test_vowel_class(self):
186 assert sonority_class("a") == 8
187 assert sonority_class("e") == 8
188
189 def test_stop_class(self):
190 assert sonority_class("p") == 1
191
192 def test_unknown_defaults_to_4(self):
193 assert sonority_class("zzqq") == 4
194
195 def test_passes_short_word(self):
196 assert passes_sonority(["k", "a"])
197
198 def test_passes_canonical_cv(self):
199 assert passes_sonority(["k", "a", "t"])
200
201 def test_rejects_falling_onset(self):
202 # /rk/ at word start: liquid (sonority 6) before stop (1)
203 # That's a falling onset before the vowel, which the soft check catches
204 assert not passes_sonority(["r", "k", "a"])
205
206
207class TestClusters:
208 def test_normalize_joins(self):
209 assert normalize_word(["k", "a", "th"]) == "kath"
210
211 def test_no_forbidden_passes(self):
212 assert not has_forbidden_cluster("kath", [])
213
214 def test_substring_match(self):
215 assert has_forbidden_cluster("kathx", ["thx"])
216
217 def test_word_boundary_anchor_start(self):
218 # #sr means /sr/ only at word start
219 assert has_forbidden_cluster("srak", ["#sr"])
220 assert not has_forbidden_cluster("aksra", ["#sr"])
221
222 def test_word_boundary_anchor_end(self):
223 assert has_forbidden_cluster("katl", ["tl#"])
224 assert not has_forbidden_cluster("katla", ["tl#"])
225
226
227class TestRewrite:
228 def test_parse_simple_rule(self):
229 pat, repl = parse_rule("k -> g")
230 assert pat.pattern == "k"
231 assert repl == "g"
232
233 def test_apply_simple_substitution(self):
234 result = apply_rules("kakak", ["k -> g"])
235 assert result == "gagag"
236
237 def test_apply_anchored_rule(self):
238 result = apply_rules("hahah", ["h$ -> "])
239 assert result == "haha"
240
241 def test_apply_multiple_rules_in_order(self):
242 result = apply_rules("kath", ["th -> dh", "k -> g"])
243 assert result == "gadh"
244
245 def test_invalid_rule_skipped(self):
246 result = apply_rules("kath", ["no arrow", "k -> g"])
247 assert result == "gath"
248
249 def test_capitalize_name(self):
250 assert capitalize_name("aldric") == "Aldric"
251
252 def test_capitalize_empty(self):
253 assert capitalize_name("") == ""
254
255
256class TestEngine:
257 @pytest.fixture
258 def engine(self) -> Engine:
259 return Engine(
260 inventory=PhonemeInventory(
261 name="test",
262 consonants=["k", "t", "n", "m", "s", "l", "r"],
263 vowels=["a", "e", "i", "o"],
264 liquids=["l", "r"],
265 nasals=["n", "m"],
266 ),
267 templates=["(C)V(C)", "CV"],
268 forbidden_clusters=["#sr"],
269 rewrite_rules=["k -> c"],
270 min_length=3,
271 max_length=10,
272 )
273
274 def test_name_returns_string(self, engine: Engine):
275 name = engine.name(seed=42)
276 assert isinstance(name, str)
277 assert len(name) >= 3
278
279 def test_name_is_deterministic_for_seed(self, engine: Engine):
280 a = engine.name(seed=42)
281 b = engine.name(seed=42)
282 assert a == b
283
284 def test_name_is_capitalized(self, engine: Engine):
285 name = engine.name(seed=42)
286 assert name[0].isupper()
287
288 def test_rewrite_rule_applied(self, engine: Engine):
289 # Engine has "k -> c" rule, so no lowercase k should appear
290 names = engine.names(count=20, seed=42)
291 for name in names:
292 assert "k" not in name
293
294 def test_names_returns_distinct(self, engine: Engine):
295 names = engine.names(count=10, seed=42)
296 assert len(set(names)) == len(names)
297
298 def test_names_respects_count(self, engine: Engine):
299 names = engine.names(count=5, seed=42)
300 assert len(names) == 5
301
302 def test_names_within_length_envelope(self, engine: Engine):
303 names = engine.names(count=20, seed=42)
304 for name in names:
305 assert engine.min_length <= len(name) <= engine.max_length