"""Tests for the storied.names phonotactic engine layer.""" import pytest from storied.names.engine.clusters import has_forbidden_cluster, normalize_word from storied.names.engine.generator import Engine from storied.names.engine.inventory import PhonemeInventory, zipfian_weights from storied.names.engine.rewrite import ( apply_rules, capitalize_name, parse_rule, ) from storied.names.engine.sonority import passes_sonority, sonority_class from storied.names.engine.syllable import ( parse_template, sample_syllable, sample_word, ) class TestZipfianWeights: def test_empty(self): assert zipfian_weights(0) == [] def test_single(self): weights = zipfian_weights(1) assert weights == [1.0] def test_normalized(self): weights = zipfian_weights(5) assert sum(weights) == pytest.approx(1.0) def test_decreasing(self): weights = zipfian_weights(6) for i in range(len(weights) - 1): assert weights[i] > weights[i + 1] class TestPhonemeInventory: @pytest.fixture def inventory(self) -> PhonemeInventory: return PhonemeInventory( name="test", consonants=["k", "t", "n", "m", "s", "r", "l"], vowels=["a", "e", "i", "o"], liquids=["r", "l"], nasals=["n", "m"], ) def test_classes_exposes_C_and_V(self, inventory: PhonemeInventory): classes = inventory.classes() assert classes["C"] == ["k", "t", "n", "m", "s", "r", "l"] assert classes["V"] == ["a", "e", "i", "o"] def test_classes_uses_explicit_liquids(self, inventory: PhonemeInventory): assert inventory.classes()["L"] == ["r", "l"] def test_classes_falls_back_to_heuristic_liquids(self): inv = PhonemeInventory( name="x", consonants=["t", "k", "l", "r", "m"], vowels=["a", "i"], ) # No explicit liquids — heuristic should pick l and r liquids = inv.classes()["L"] assert "l" in liquids assert "r" in liquids def test_sample_returns_phoneme(self, inventory: PhonemeInventory): import random rng = random.Random(42) result = inventory.sample("V", rng) assert result in inventory.vowels def test_sample_unknown_class_raises(self, inventory: PhonemeInventory): import random rng = random.Random(42) with pytest.raises(ValueError, match="Z"): inventory.sample("Z", rng) def test_trim_reduces_consonants(self, inventory: PhonemeInventory): import random rng = random.Random(42) trimmed = inventory.trim(rng, drop_fraction=0.3) # 0.3 * 7 ≈ 2 dropped from 7 assert len(trimmed.consonants) < len(inventory.consonants) def test_trim_keeps_small_inventories(self): small = PhonemeInventory( name="tiny", consonants=["k", "n"], vowels=["a", "i"], ) import random rng = random.Random(42) trimmed = small.trim(rng, drop_fraction=0.5) assert trimmed.consonants == ["k", "n"] class TestSyllableParser: def test_simple_template(self): slots = parse_template("CV") assert len(slots) == 2 assert slots[0].klass == "C" assert not slots[0].optional assert slots[1].klass == "V" def test_optional_slot(self): slots = parse_template("(C)V") assert slots[0].optional assert slots[1].klass == "V" assert not slots[1].optional def test_complex_template(self): slots = parse_template("(C)(L)V(C)") assert len(slots) == 4 assert [s.klass for s in slots] == ["C", "L", "V", "C"] assert [s.optional for s in slots] == [True, True, False, True] def test_unclosed_paren_raises(self): with pytest.raises(ValueError, match="Unclosed paren"): parse_template("(CV") def test_unknown_class_raises(self): with pytest.raises(ValueError, match="Unknown class letter"): parse_template("CZV") def test_multi_letter_optional_raises(self): with pytest.raises(ValueError, match="single class letters"): parse_template("(CV)") class TestSampleSyllable: @pytest.fixture def inventory(self) -> PhonemeInventory: return PhonemeInventory( name="x", consonants=["k", "t"], vowels=["a", "i"], ) def test_returns_phoneme_list(self, inventory: PhonemeInventory): import random rng = random.Random(42) slots = parse_template("CV") result = sample_syllable(slots, inventory, rng) assert isinstance(result, list) assert len(result) == 2 def test_optional_slot_sometimes_skipped( self, inventory: PhonemeInventory, ): import random rng = random.Random(42) slots = parse_template("(C)V") results = [sample_syllable(slots, inventory, rng) for _ in range(50)] # Some should be 1-element (skipped optional), some 2 lengths = {len(r) for r in results} assert 1 in lengths or 2 in lengths class TestSampleWord: def test_produces_phonemes(self): import random rng = random.Random(42) inv = PhonemeInventory( name="x", consonants=["k", "t", "n"], vowels=["a", "i", "o"], ) slots = [parse_template("CV")] word = sample_word(slots, inv, rng, syllable_count=(2, 2)) assert len(word) == 4 # 2 syllables × 2 phonemes each class TestSonority: def test_vowel_class(self): assert sonority_class("a") == 8 assert sonority_class("e") == 8 def test_stop_class(self): assert sonority_class("p") == 1 def test_unknown_defaults_to_4(self): assert sonority_class("zzqq") == 4 def test_passes_short_word(self): assert passes_sonority(["k", "a"]) def test_passes_canonical_cv(self): assert passes_sonority(["k", "a", "t"]) def test_rejects_falling_onset(self): # /rk/ at word start: liquid (sonority 6) before stop (1) # That's a falling onset before the vowel, which the soft check catches assert not passes_sonority(["r", "k", "a"]) class TestClusters: def test_normalize_joins(self): assert normalize_word(["k", "a", "th"]) == "kath" def test_no_forbidden_passes(self): assert not has_forbidden_cluster("kath", []) def test_substring_match(self): assert has_forbidden_cluster("kathx", ["thx"]) def test_word_boundary_anchor_start(self): # #sr means /sr/ only at word start assert has_forbidden_cluster("srak", ["#sr"]) assert not has_forbidden_cluster("aksra", ["#sr"]) def test_word_boundary_anchor_end(self): assert has_forbidden_cluster("katl", ["tl#"]) assert not has_forbidden_cluster("katla", ["tl#"]) class TestRewrite: def test_parse_simple_rule(self): pat, repl = parse_rule("k -> g") assert pat.pattern == "k" assert repl == "g" def test_apply_simple_substitution(self): result = apply_rules("kakak", ["k -> g"]) assert result == "gagag" def test_apply_anchored_rule(self): result = apply_rules("hahah", ["h$ -> "]) assert result == "haha" def test_apply_multiple_rules_in_order(self): result = apply_rules("kath", ["th -> dh", "k -> g"]) assert result == "gadh" def test_invalid_rule_skipped(self): result = apply_rules("kath", ["no arrow", "k -> g"]) assert result == "gath" def test_capitalize_name(self): assert capitalize_name("aldric") == "Aldric" def test_capitalize_empty(self): assert capitalize_name("") == "" class TestEngine: @pytest.fixture def engine(self) -> Engine: return Engine( inventory=PhonemeInventory( name="test", consonants=["k", "t", "n", "m", "s", "l", "r"], vowels=["a", "e", "i", "o"], liquids=["l", "r"], nasals=["n", "m"], ), templates=["(C)V(C)", "CV"], forbidden_clusters=["#sr"], rewrite_rules=["k -> c"], min_length=3, max_length=10, ) def test_name_returns_string(self, engine: Engine): name = engine.name(seed=42) assert isinstance(name, str) assert len(name) >= 3 def test_name_is_deterministic_for_seed(self, engine: Engine): a = engine.name(seed=42) b = engine.name(seed=42) assert a == b def test_name_is_capitalized(self, engine: Engine): name = engine.name(seed=42) assert name[0].isupper() def test_rewrite_rule_applied(self, engine: Engine): # Engine has "k -> c" rule, so no lowercase k should appear names = engine.names(count=20, seed=42) for name in names: assert "k" not in name def test_names_returns_distinct(self, engine: Engine): names = engine.names(count=10, seed=42) assert len(set(names)) == len(names) def test_names_respects_count(self, engine: Engine): names = engine.names(count=5, seed=42) assert len(names) == 5 def test_names_within_length_envelope(self, engine: Engine): names = engine.names(count=20, seed=42) for name in names: assert engine.min_length <= len(name) <= engine.max_length