"""Tests for the storied.names phonotactic engine layer."""

import pytest

from storied.names.engine.clusters import has_forbidden_cluster, normalize_word
from storied.names.engine.generator import Engine
from storied.names.engine.inventory import PhonemeInventory, zipfian_weights
from storied.names.engine.rewrite import (
    apply_rules,
    capitalize_name,
    parse_rule,
)
from storied.names.engine.sonority import passes_sonority, sonority_class
from storied.names.engine.syllable import (
    parse_template,
    sample_syllable,
    sample_word,
)


class TestZipfianWeights:
    def test_empty(self):
        assert zipfian_weights(0) == []

    def test_single(self):
        weights = zipfian_weights(1)
        assert weights == [1.0]

    def test_normalized(self):
        weights = zipfian_weights(5)
        assert sum(weights) == pytest.approx(1.0)

    def test_decreasing(self):
        weights = zipfian_weights(6)
        for i in range(len(weights) - 1):
            assert weights[i] > weights[i + 1]


class TestPhonemeInventory:
    @pytest.fixture
    def inventory(self) -> PhonemeInventory:
        return PhonemeInventory(
            name="test",
            consonants=["k", "t", "n", "m", "s", "r", "l"],
            vowels=["a", "e", "i", "o"],
            liquids=["r", "l"],
            nasals=["n", "m"],
        )

    def test_classes_exposes_C_and_V(self, inventory: PhonemeInventory):
        classes = inventory.classes()
        assert classes["C"] == ["k", "t", "n", "m", "s", "r", "l"]
        assert classes["V"] == ["a", "e", "i", "o"]

    def test_classes_uses_explicit_liquids(self, inventory: PhonemeInventory):
        assert inventory.classes()["L"] == ["r", "l"]

    def test_classes_falls_back_to_heuristic_liquids(self):
        inv = PhonemeInventory(
            name="x",
            consonants=["t", "k", "l", "r", "m"],
            vowels=["a", "i"],
        )
        # No explicit liquids — heuristic should pick l and r
        liquids = inv.classes()["L"]
        assert "l" in liquids
        assert "r" in liquids

    def test_sample_returns_phoneme(self, inventory: PhonemeInventory):
        import random

        rng = random.Random(42)
        result = inventory.sample("V", rng)
        assert result in inventory.vowels

    def test_sample_unknown_class_raises(self, inventory: PhonemeInventory):
        import random

        rng = random.Random(42)
        with pytest.raises(ValueError, match="Z"):
            inventory.sample("Z", rng)

    def test_trim_reduces_consonants(self, inventory: PhonemeInventory):
        import random

        rng = random.Random(42)
        trimmed = inventory.trim(rng, drop_fraction=0.3)
        # 0.3 * 7 ≈ 2 dropped from 7
        assert len(trimmed.consonants) < len(inventory.consonants)

    def test_trim_keeps_small_inventories(self):
        small = PhonemeInventory(
            name="tiny",
            consonants=["k", "n"],
            vowels=["a", "i"],
        )
        import random

        rng = random.Random(42)
        trimmed = small.trim(rng, drop_fraction=0.5)
        assert trimmed.consonants == ["k", "n"]


class TestSyllableParser:
    def test_simple_template(self):
        slots = parse_template("CV")
        assert len(slots) == 2
        assert slots[0].klass == "C"
        assert not slots[0].optional
        assert slots[1].klass == "V"

    def test_optional_slot(self):
        slots = parse_template("(C)V")
        assert slots[0].optional
        assert slots[1].klass == "V"
        assert not slots[1].optional

    def test_complex_template(self):
        slots = parse_template("(C)(L)V(C)")
        assert len(slots) == 4
        assert [s.klass for s in slots] == ["C", "L", "V", "C"]
        assert [s.optional for s in slots] == [True, True, False, True]

    def test_unclosed_paren_raises(self):
        with pytest.raises(ValueError, match="Unclosed paren"):
            parse_template("(CV")

    def test_unknown_class_raises(self):
        with pytest.raises(ValueError, match="Unknown class letter"):
            parse_template("CZV")

    def test_multi_letter_optional_raises(self):
        with pytest.raises(ValueError, match="single class letters"):
            parse_template("(CV)")


class TestSampleSyllable:
    @pytest.fixture
    def inventory(self) -> PhonemeInventory:
        return PhonemeInventory(
            name="x",
            consonants=["k", "t"],
            vowels=["a", "i"],
        )

    def test_returns_phoneme_list(self, inventory: PhonemeInventory):
        import random

        rng = random.Random(42)
        slots = parse_template("CV")
        result = sample_syllable(slots, inventory, rng)
        assert isinstance(result, list)
        assert len(result) == 2

    def test_optional_slot_sometimes_skipped(
        self,
        inventory: PhonemeInventory,
    ):
        import random

        rng = random.Random(42)
        slots = parse_template("(C)V")
        results = [sample_syllable(slots, inventory, rng) for _ in range(50)]
        # Some should be 1-element (skipped optional), some 2
        lengths = {len(r) for r in results}
        assert 1 in lengths or 2 in lengths


class TestSampleWord:
    def test_produces_phonemes(self):
        import random

        rng = random.Random(42)
        inv = PhonemeInventory(
            name="x",
            consonants=["k", "t", "n"],
            vowels=["a", "i", "o"],
        )
        slots = [parse_template("CV")]
        word = sample_word(slots, inv, rng, syllable_count=(2, 2))
        assert len(word) == 4  # 2 syllables × 2 phonemes each


class TestSonority:
    def test_vowel_class(self):
        assert sonority_class("a") == 8
        assert sonority_class("e") == 8

    def test_stop_class(self):
        assert sonority_class("p") == 1

    def test_unknown_defaults_to_4(self):
        assert sonority_class("zzqq") == 4

    def test_passes_short_word(self):
        assert passes_sonority(["k", "a"])

    def test_passes_canonical_cv(self):
        assert passes_sonority(["k", "a", "t"])

    def test_rejects_falling_onset(self):
        # /rk/ at word start: liquid (sonority 6) before stop (1)
        # That's a falling onset before the vowel, which the soft check catches
        assert not passes_sonority(["r", "k", "a"])


class TestClusters:
    def test_normalize_joins(self):
        assert normalize_word(["k", "a", "th"]) == "kath"

    def test_no_forbidden_passes(self):
        assert not has_forbidden_cluster("kath", [])

    def test_substring_match(self):
        assert has_forbidden_cluster("kathx", ["thx"])

    def test_word_boundary_anchor_start(self):
        # #sr means /sr/ only at word start
        assert has_forbidden_cluster("srak", ["#sr"])
        assert not has_forbidden_cluster("aksra", ["#sr"])

    def test_word_boundary_anchor_end(self):
        assert has_forbidden_cluster("katl", ["tl#"])
        assert not has_forbidden_cluster("katla", ["tl#"])


class TestRewrite:
    def test_parse_simple_rule(self):
        pat, repl = parse_rule("k -> g")
        assert pat.pattern == "k"
        assert repl == "g"

    def test_apply_simple_substitution(self):
        result = apply_rules("kakak", ["k -> g"])
        assert result == "gagag"

    def test_apply_anchored_rule(self):
        result = apply_rules("hahah", ["h$ -> "])
        assert result == "haha"

    def test_apply_multiple_rules_in_order(self):
        result = apply_rules("kath", ["th -> dh", "k -> g"])
        assert result == "gadh"

    def test_invalid_rule_skipped(self):
        result = apply_rules("kath", ["no arrow", "k -> g"])
        assert result == "gath"

    def test_capitalize_name(self):
        assert capitalize_name("aldric") == "Aldric"

    def test_capitalize_empty(self):
        assert capitalize_name("") == ""


class TestEngine:
    @pytest.fixture
    def engine(self) -> Engine:
        return Engine(
            inventory=PhonemeInventory(
                name="test",
                consonants=["k", "t", "n", "m", "s", "l", "r"],
                vowels=["a", "e", "i", "o"],
                liquids=["l", "r"],
                nasals=["n", "m"],
            ),
            templates=["(C)V(C)", "CV"],
            forbidden_clusters=["#sr"],
            rewrite_rules=["k -> c"],
            min_length=3,
            max_length=10,
        )

    def test_name_returns_string(self, engine: Engine):
        name = engine.name(seed=42)
        assert isinstance(name, str)
        assert len(name) >= 3

    def test_name_is_deterministic_for_seed(self, engine: Engine):
        a = engine.name(seed=42)
        b = engine.name(seed=42)
        assert a == b

    def test_name_is_capitalized(self, engine: Engine):
        name = engine.name(seed=42)
        assert name[0].isupper()

    def test_rewrite_rule_applied(self, engine: Engine):
        # Engine has "k -> c" rule, so no lowercase k should appear
        names = engine.names(count=20, seed=42)
        for name in names:
            assert "k" not in name

    def test_names_returns_distinct(self, engine: Engine):
        names = engine.names(count=10, seed=42)
        assert len(set(names)) == len(names)

    def test_names_respects_count(self, engine: Engine):
        names = engine.names(count=5, seed=42)
        assert len(names) == 5

    def test_names_within_length_envelope(self, engine: Engine):
        names = engine.names(count=20, seed=42)
        for name in names:
            assert engine.min_length <= len(name) <= engine.max_length