use anyhow::{Context, Result};
use chrono::prelude::*;
use rusqlite::Connection;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Mutex;

#[derive(Debug, Clone, Default, Serialize, Deserialize, clap::ValueEnum)]
#[serde(rename_all = "snake_case")]
pub enum SortField {
    #[default]
    Duration,
    Count,
    Name,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct Stats {
    pub build_count: i64,
    pub build_total_ms: i64,
    pub subst_count: i64,
    pub subst_total_ms: i64,
    pub download_bytes: i64,
    pub download_ms: i64,
    pub slowest_builds: Vec<SlowBuild>,
    pub cache_latency: Vec<CacheStat>,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct SlowBuild {
    pub duration_ms: i64,
    pub count: Option<i64>,
    pub drv_path: Option<String>,
    pub text: Option<String>,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct CacheStat {
    pub cache_url: String,
    pub avg_ms: f64,
    pub count: i64,
}

// Snapshot of the current day's in-memory aggregates, passed from the daemon
// to collect_stats so the summary path never needs to scan the events table.
// cache: substituter URL → (total_ms, count)
#[derive(Clone, Default)]
pub struct TodaySummary {
    pub day:            i64,
    pub build_count:    i64,
    pub build_total_ms: i64,
    pub subst_count:    i64,
    pub subst_total_ms: i64,
    pub download_count: i64,
    pub download_bytes: i64,
    pub download_ms:    i64,
    pub cache: HashMap<String, (i64, i64)>,
}

// Fast path: query daily_stats for closed days, merge today's in-memory snapshot.
// Returns (build_count, build_ms, subst_count, subst_ms, dl_bytes, dl_ms, cache_latency).
fn summary_from_cache(
    conn: &Connection,
    since: Option<i64>,
    today: &TodaySummary,
) -> Result<(i64, i64, i64, i64, i64, i64, Vec<CacheStat>)> {
    assert!(today.day > 0);
    let since_day: Option<i64> = since.map(|ts| ts / 86400);
    let today_in_range = since_day.map_or(true, |sd| today.day >= sd);

    let (hbc, hbms) = conn.query_row(
        "SELECT COALESCE(SUM(count),0), COALESCE(SUM(total_ms),0)
         FROM daily_stats WHERE event_type = 105
           AND (?1 IS NULL OR day >= ?1) AND day < ?2",
        rusqlite::params![since_day, today.day],
        |r| Ok((r.get::<_, i64>(0)?, r.get::<_, i64>(1)?)),
    ).context("Failed to query build summary from daily_stats")?;

    let (hsc, hsms) = conn.query_row(
        "SELECT COALESCE(SUM(count),0), COALESCE(SUM(total_ms),0)
         FROM daily_stats WHERE event_type = 108
           AND (?1 IS NULL OR day >= ?1) AND day < ?2",
        rusqlite::params![since_day, today.day],
        |r| Ok((r.get::<_, i64>(0)?, r.get::<_, i64>(1)?)),
    ).context("Failed to query subst summary from daily_stats")?;

    let (hdb, hdms) = conn.query_row(
        "SELECT COALESCE(SUM(total_bytes),0), COALESCE(SUM(total_ms),0)
         FROM daily_stats WHERE event_type = 101
           AND (?1 IS NULL OR day >= ?1) AND day < ?2",
        rusqlite::params![since_day, today.day],
        |r| Ok((r.get::<_, i64>(0)?, r.get::<_, i64>(1)?)),
    ).context("Failed to query download summary from daily_stats")?;

    let build_count    = hbc  + if today_in_range { today.build_count    } else { 0 };
    let build_total_ms = hbms + if today_in_range { today.build_total_ms } else { 0 };
    let subst_count    = hsc  + if today_in_range { today.subst_count    } else { 0 };
    let subst_total_ms = hsms + if today_in_range { today.subst_total_ms } else { 0 };
    let download_bytes = hdb  + if today_in_range { today.download_bytes } else { 0 };
    let download_ms    = hdms + if today_in_range { today.download_ms    } else { 0 };

    // Cache latency: closed days from daily_cache_stats, today from memory.
    let mut cache_map: HashMap<String, (i64, i64)> = HashMap::new();
    let mut stmt = conn.prepare(
        "SELECT cache_url, SUM(total_ms), SUM(count)
         FROM daily_cache_stats
         WHERE (?1 IS NULL OR day >= ?1) AND day < ?2
         GROUP BY cache_url",
    ).context("Failed to prepare cache latency query")?;
    for row in stmt.query_map(rusqlite::params![since_day, today.day], |r| {
        Ok((r.get::<_, String>(0)?, r.get::<_, i64>(1)?, r.get::<_, i64>(2)?))
    })?.filter_map(|r| r.ok()) {
        let (url, ms, cnt) = row;
        let e = cache_map.entry(url).or_insert((0, 0));
        e.0 += ms; e.1 += cnt;
    }
    if today_in_range {
        for (url, &(ms, cnt)) in &today.cache {
            let e = cache_map.entry(url.clone()).or_insert((0, 0));
            e.0 += ms; e.1 += cnt;
        }
    }
    let mut cache_latency: Vec<CacheStat> = cache_map.into_iter()
        .filter(|(_, (_, cnt))| *cnt > 0)
        .map(|(url, (ms, cnt))| CacheStat { cache_url: url, avg_ms: ms as f64 / cnt as f64, count: cnt })
        .collect();
    cache_latency.sort_by(|a, b| b.avg_ms.partial_cmp(&a.avg_ms).unwrap_or(std::cmp::Ordering::Equal));

    Ok((build_count, build_total_ms, subst_count, subst_total_ms, download_bytes, download_ms, cache_latency))
}

// Slow path: full events table scan (used in direct mode or when drv filter is active).
fn summary_from_events(
    conn: &Connection,
    since: Option<i64>,
    drv: Option<&str>,
) -> Result<(i64, i64, i64, i64, i64, i64, Vec<CacheStat>)> {
    let (build_count, build_total_ms) = conn.query_row(
        "SELECT COUNT(*), COALESCE(SUM(duration_ms), 0)
         FROM events INDEXED BY idx_events_type_start WHERE event_type = 105
           AND (?1 IS NULL OR start_time >= ?1)
           AND (?2 IS NULL OR drv_path LIKE '%' || ?2 || '%')",
        rusqlite::params![since, drv],
        |r| Ok((r.get::<_, i64>(0)?, r.get::<_, i64>(1)?)),
    ).context("Failed to query build summary")?;

    let (subst_count, subst_total_ms) = conn.query_row(
        "SELECT COUNT(*), COALESCE(SUM(duration_ms), 0)
         FROM events INDEXED BY idx_events_type_start WHERE event_type = 108
           AND (?1 IS NULL OR start_time >= ?1)
           AND (?2 IS NULL OR drv_path LIKE '%' || ?2 || '%')",
        rusqlite::params![since, drv],
        |r| Ok((r.get::<_, i64>(0)?, r.get::<_, i64>(1)?)),
    ).context("Failed to query substitution summary")?;

    let (download_bytes, download_ms) = conn.query_row(
        "SELECT COALESCE(SUM(total_bytes), 0), COALESCE(SUM(duration_ms), 0)
         FROM events INDEXED BY idx_events_type_start WHERE event_type = 101
           AND (?1 IS NULL OR start_time >= ?1)",
        rusqlite::params![since],
        |r| Ok((r.get::<_, i64>(0)?, r.get::<_, i64>(1)?)),
    ).context("Failed to query download summary")?;

    let mut stmt = conn.prepare(
        "SELECT cache_url, AVG(duration_ms), COUNT(*)
         FROM events INDEXED BY idx_events_type_start
         WHERE event_type = 108 AND cache_url IS NOT NULL
           AND (?1 IS NULL OR start_time >= ?1)
         GROUP BY cache_url ORDER BY AVG(duration_ms) DESC",
    ).context("Failed to prepare cache latency query")?;
    let cache_latency: Vec<CacheStat> = stmt.query_map(rusqlite::params![since], |r| {
        Ok(CacheStat { cache_url: r.get(0)?, avg_ms: r.get(1)?, count: r.get(2)? })
    })?.filter_map(|r| r.ok()).collect();

    Ok((build_count, build_total_ms, subst_count, subst_total_ms, download_bytes, download_ms, cache_latency))
}

pub fn collect_stats(
    db: &Mutex<Connection>,
    since: Option<i64>,
    drv: Option<&str>,
    sort: SortField,
    limit: u32,
    group: bool,
    today: Option<TodaySummary>,
) -> Result<Stats> {
    assert!(limit > 0, "limit must be > 0");

    let conn = db.lock().unwrap();

    // Fast path: daily_stats + today memory — O(days) instead of O(events).
    // Falls back to events scan when a drv filter is active (drv_path not in daily_stats)
    // or when running in direct mode (no daemon, today is None).
    let (build_count, build_total_ms, subst_count, subst_total_ms,
         download_bytes, download_ms, cache_latency) =
        if let (Some(t), None) = (today.as_ref(), drv) {
            summary_from_cache(&conn, since, t)?
        } else {
            summary_from_events(&conn, since, drv)?
        };

    assert!(build_count >= 0);
    assert!(subst_count >= 0);
    assert!(download_bytes >= 0);

    let slowest_builds: Vec<SlowBuild> = if group {
        let sort_col = match sort {
            SortField::Duration => "avg_ms",
            SortField::Count    => "cnt",
            SortField::Name     => "drv_path",
        };
        let sql = format!(
            "SELECT drv_path, CAST(ROUND(AVG(duration_ms)) AS INTEGER) as avg_ms, COUNT(*) as cnt
             FROM events INDEXED BY idx_events_type_start
             WHERE event_type = 105
               AND (?1 IS NULL OR start_time >= ?1)
               AND (?2 IS NULL OR drv_path LIKE '%' || ?2 || '%')
             GROUP BY drv_path
             ORDER BY {sort_col} DESC
             LIMIT ?3"
        );
        let mut stmt = conn.prepare(&sql).context("Failed to prepare grouped builds query")?;
        stmt.query_map(rusqlite::params![since, drv, limit], |r| {
            Ok(SlowBuild { drv_path: r.get(0)?, duration_ms: r.get(1)?, count: Some(r.get(2)?), text: None })
        })?.filter_map(|r| r.ok()).collect()
    } else {
        let sort_col = match sort {
            SortField::Duration | SortField::Count => "duration_ms",
            SortField::Name => "drv_path",
        };
        let sql = format!(
            "SELECT duration_ms, drv_path, text
             FROM events INDEXED BY idx_events_type_start
             WHERE event_type = 105
               AND (?1 IS NULL OR start_time >= ?1)
               AND (?2 IS NULL OR drv_path LIKE '%' || ?2 || '%')
             ORDER BY {sort_col} DESC
             LIMIT ?3"
        );
        let mut stmt = conn.prepare(&sql).context("Failed to prepare slowest builds query")?;
        stmt.query_map(rusqlite::params![since, drv, limit], |r| {
            Ok(SlowBuild { duration_ms: r.get(0)?, count: None, drv_path: r.get(1)?, text: r.get(2)? })
        })?.filter_map(|r| r.ok()).collect()
    };

    Ok(Stats { build_count, build_total_ms, subst_count, subst_total_ms, download_bytes, download_ms, slowest_builds, cache_latency })
}

// Mann-Whitney U is non-parametric and makes no distributional assumptions,
// which is appropriate for build times that are right-skewed.
pub struct MannWhitneyResult {
    pub p_value: f64,
}

pub fn mann_whitney_u(a: &[i64], b: &[i64]) -> Option<MannWhitneyResult> {
    if a.is_empty() || b.is_empty() { return None; }

    let n1 = a.len();
    let n2 = b.len();
    let n1f = n1 as f64;
    let n2f = n2 as f64;

    let mut combined: Vec<(i64, usize)> = a.iter().map(|&v| (v, 0))
        .chain(b.iter().map(|&v| (v, 1)))
        .collect();
    combined.sort_unstable_by_key(|&(v, _)| v);

    let n = combined.len();

    let mut rank_sum_a = 0.0f64;
    let mut tie_correction = 0.0f64;
    let mut i = 0;
    while i < n {
        let mut j = i;
        while j < n && combined[j].0 == combined[i].0 { j += 1; }
        let avg_rank = (i as f64 + 1.0 + j as f64) / 2.0;
        for k in i..j {
            if combined[k].1 == 0 { rank_sum_a += avg_rank; }
        }
        let t = (j - i) as f64;
        if t > 1.0 { tie_correction += t * t * t - t; }
        i = j;
    }

    assert!(rank_sum_a >= 0.0);

    let u_a = rank_sum_a - n1f * (n1f + 1.0) / 2.0;
    let u_b = n1f * n2f - u_a;

    assert!(u_a >= 0.0);
    assert!(u_b >= 0.0);
    assert!((u_a + u_b - n1f * n2f).abs() < 1e-6, "U_A + U_B must equal n1*n2");

    let cliffs_delta = (u_a - u_b) / (n1f * n2f);
    assert!(cliffs_delta >= -1.0 - 1e-9);
    assert!(cliffs_delta <= 1.0 + 1e-9);
    let _ = cliffs_delta;

    let nf = n as f64;
    let variance = (n1f * n2f / 12.0) * ((nf + 1.0) - tie_correction / (nf * (nf - 1.0)));

    let p_value = if variance <= 0.0 {
        1.0
    } else {
        let u_min = u_a.min(u_b);
        let mean_u = n1f * n2f / 2.0;
        let z = (u_min - mean_u + 0.5) / variance.sqrt();
        assert!(z <= 0.0 + 1e-9, "z must be non-positive for U_min ≤ mean_U");
        (2.0 * normal_cdf(z)).min(1.0)
    };

    assert!(p_value >= 0.0);
    assert!(p_value <= 1.0);

    Some(MannWhitneyResult { p_value })
}

fn normal_cdf(z: f64) -> f64 {
    0.5 * (1.0 + erf_approx(z / std::f64::consts::SQRT_2))
}

fn erf_approx(x: f64) -> f64 {
    let t = 1.0 / (1.0 + 0.3275911 * x.abs());
    let poly = t * (0.254829592
        + t * (-0.284496736
        + t * (1.421413741
        + t * (-1.453152027
        + t * 1.061405429))));
    let result = 1.0 - poly * (-x * x).exp();
    if x >= 0.0 { result } else { -result }
}

pub fn median_sorted(sorted: &[i64]) -> f64 {
    assert!(!sorted.is_empty());
    let n = sorted.len();
    if n % 2 == 0 {
        (sorted[n / 2 - 1] + sorted[n / 2]) as f64 / 2.0
    } else {
        sorted[n / 2] as f64
    }
}

pub fn fmt_ms(ms: i64) -> String {
    if ms < 1000 {
        format!("{}ms", ms)
    } else if ms < 60_000 {
        format!("{:.1}s", ms as f64 / 1000.0)
    } else {
        format!("{}m{:.1}s", ms / 60_000, (ms % 60_000) as f64 / 1000.0)
    }
}

fn drv_name(path: &str) -> &str {
    path.strip_prefix("/nix/store/").unwrap_or(path)
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, clap::ValueEnum)]
#[serde(rename_all = "snake_case")]
pub enum BucketSize {
    Hour,
    Day,
    Week,
    Month,
}

impl BucketSize {
    // Format string must match bucket_label() output exactly so that display
    // functions and any downstream consumers are consistent.
    pub fn strftime_fmt(&self) -> &'static str {
        match self {
            BucketSize::Hour  => "%Y-%m-%dT%H",
            BucketSize::Day   => "%Y-%m-%d",
            BucketSize::Week  => "%Y-W%W",
            BucketSize::Month => "%Y-%m",
        }
    }

    fn col_width(&self) -> usize {
        match self {
            BucketSize::Hour  => 13,
            BucketSize::Day   => 10,
            BucketSize::Week  => 8,
            BucketSize::Month => 7,
        }
    }
}

// Compute the display label for the bucket containing ts.
// Uses the same format strings as strftime_fmt() so labels are consistent
// with any legacy data or external tooling that used the SQL strftime path.
fn bucket_label(ts: i64, size: &BucketSize) -> String {
    let dt = DateTime::from_timestamp(ts, 0)
        .unwrap_or_default()
        .with_timezone(&Utc);
    match size {
        BucketSize::Hour  => dt.format("%Y-%m-%dT%H").to_string(),
        BucketSize::Day   => dt.format("%Y-%m-%d").to_string(),
        BucketSize::Week  => dt.format("%Y-W%W").to_string(),
        BucketSize::Month => dt.format("%Y-%m").to_string(),
    }
}

// Compute the start timestamp of the bucket immediately after the bucket
// containing ts. Used for integer-comparison bucket detection in the hot loop —
// avoids a string allocation + comparison per row.
fn bucket_end(ts: i64, size: &BucketSize) -> i64 {
    let dt = DateTime::from_timestamp(ts, 0)
        .unwrap_or_default()
        .with_timezone(&Utc);
    match size {
        BucketSize::Hour => {
            Utc.with_ymd_and_hms(dt.year(), dt.month(), dt.day(), dt.hour(), 0, 0)
                .unwrap()
                .checked_add_signed(chrono::Duration::hours(1))
                .unwrap()
                .timestamp()
        }
        BucketSize::Day => {
            Utc.with_ymd_and_hms(dt.year(), dt.month(), dt.day(), 0, 0, 0)
                .unwrap()
                .checked_add_signed(chrono::Duration::days(1))
                .unwrap()
                .timestamp()
        }
        BucketSize::Week => {
            // Monday-based weeks, matching strftime '%W'.
            let days_since_monday = dt.weekday().num_days_from_monday() as i64;
            let monday = dt.date_naive() - chrono::Duration::days(days_since_monday);
            Utc.from_utc_datetime(&monday.and_hms_opt(0, 0, 0).unwrap())
                .checked_add_signed(chrono::Duration::weeks(1))
                .unwrap()
                .timestamp()
        }
        BucketSize::Month => {
            let (year, month) = if dt.month() == 12 { (dt.year() + 1, 1u32) } else { (dt.year(), dt.month() + 1) };
            Utc.with_ymd_and_hms(year, month, 1, 0, 0, 0).unwrap().timestamp()
        }
    }
}

#[derive(Debug, Serialize, Deserialize)]
pub struct TrendBucket {
    pub bucket: String,
    pub build_count: i64,
    pub build_total_ms: i64,
    pub subst_count: i64,
    pub subst_total_ms: i64,
    pub download_bytes: i64,
    // Only populated when full duration data is requested (--output test).
    #[serde(default)]
    pub build_durations: Vec<i64>,
    #[serde(default)]
    pub subst_durations: Vec<i64>,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct Trend {
    pub buckets: Vec<TrendBucket>,
    pub bucket_size: BucketSize,
    pub drv_filter: Option<String>,
}

// Events-table scan. Returns buckets with individual duration samples populated.
// Required when individual samples are needed (--output test / Mann-Whitney) or
// when a drv filter or hour granularity rules out the daily_stats path.
//
// Raw start_time integers from SQLite — bucket boundaries detected with a single
// integer comparison per row (ts >= next_bucket) so string allocs happen only
// once per bucket, not once per row.
fn trend_from_events(
    conn: &Connection,
    since: Option<i64>,
    bucket: &BucketSize,
    drv: Option<&str>,
) -> Result<Vec<TrendBucket>> {
    // FileTransfer (101) has NULL drv_path and is intentionally excluded by the drv filter.
    // INDEXED BY forces the covering start_time-first index so all four projected columns
    // (start_time, event_type, duration_ms, total_bytes) are served without table lookups.
    let mut stmt = conn.prepare(
        "SELECT start_time, event_type, duration_ms, total_bytes
         FROM events INDEXED BY idx_events_start_cover
         WHERE event_type IN (101, 105, 108)
           AND (?1 IS NULL OR start_time >= ?1)
           AND (?2 IS NULL OR drv_path LIKE '%' || ?2 || '%')
         ORDER BY start_time ASC",
    ).context("Failed to prepare trend query")?;

    let mut buckets: Vec<TrendBucket> = vec![];
    let mut next_bucket: i64 = 0;

    for row in stmt.query_map(rusqlite::params![since, drv], |r| {
        Ok((r.get::<_, i64>(0)?, r.get::<_, i64>(1)?, r.get::<_, i64>(2)?, r.get::<_, i64>(3)?))
    })?.filter_map(|r| r.ok()) {
        let (ts, etype, dur, bytes) = row;
        assert!(ts >= 0);

        if ts >= next_bucket {
            buckets.push(TrendBucket {
                bucket: bucket_label(ts, bucket),
                build_count: 0, build_total_ms: 0,
                subst_count: 0, subst_total_ms: 0,
                download_bytes: 0,
                build_durations: vec![],
                subst_durations: vec![],
            });
            next_bucket = bucket_end(ts, bucket);
        }

        let last = buckets.last_mut().unwrap();
        match etype {
            105 => { assert!(dur >= 0); last.build_count += 1; last.build_total_ms += dur; last.build_durations.push(dur); }
            108 => { assert!(dur >= 0); last.subst_count += 1; last.subst_total_ms += dur; last.subst_durations.push(dur); }
            101 => { assert!(bytes >= 0); last.download_bytes += bytes; }
            _   => {}
        }
    }

    Ok(buckets)
}

// daily_stats query. O(days) — does not populate build_durations/subst_durations.
// Closed days come from the table; today's partial data is merged from the in-memory
// snapshot so the current day is always included when running via the daemon.
fn trend_from_daily_stats(
    conn: &Connection,
    since: Option<i64>,
    bucket: &BucketSize,
    today: Option<TodaySummary>,
) -> Result<Vec<TrendBucket>> {
    let since_day: Option<i64> = since.map(|ts| ts / 86400);
    let today_day = today.as_ref().map(|t| t.day)
        .unwrap_or_else(|| Utc::now().timestamp() / 86400);

    let mut stmt = conn.prepare(
        "SELECT day * 86400, event_type, count, total_ms, total_bytes
         FROM daily_stats
         WHERE event_type IN (101, 105, 108)
           AND (?1 IS NULL OR day >= ?1)
           AND day < ?2
         ORDER BY day ASC",
    ).context("Failed to prepare trend query")?;

    let mut buckets: Vec<TrendBucket> = vec![];
    let mut next_bucket: i64 = 0;

    for row in stmt.query_map(rusqlite::params![since_day, today_day], |r| {
        Ok((r.get::<_, i64>(0)?, r.get::<_, i64>(1)?, r.get::<_, i64>(2)?, r.get::<_, i64>(3)?, r.get::<_, i64>(4)?))
    })?.filter_map(|r| r.ok()) {
        let (ts, etype, count, total_ms, total_bytes) = row;
        assert!(ts >= 0);
        assert!(count >= 0);

        if ts >= next_bucket {
            buckets.push(TrendBucket {
                bucket: bucket_label(ts, bucket),
                build_count: 0, build_total_ms: 0,
                subst_count: 0, subst_total_ms: 0,
                download_bytes: 0,
                build_durations: vec![],
                subst_durations: vec![],
            });
            next_bucket = bucket_end(ts, bucket);
        }

        let last = buckets.last_mut().unwrap();
        match etype {
            105 => { last.build_count += count; last.build_total_ms += total_ms; }
            108 => { last.subst_count += count; last.subst_total_ms += total_ms; }
            101 => { last.download_bytes += total_bytes; }
            _   => {}
        }
    }

    if let Some(t) = today {
        if t.build_count > 0 || t.subst_count > 0 || t.download_bytes > 0 {
            let today_ts = t.day * 86400;
            if since.map_or(true, |s| today_ts >= s) {
                if today_ts >= next_bucket {
                    buckets.push(TrendBucket {
                        bucket: bucket_label(today_ts, bucket),
                        build_count: t.build_count,
                        build_total_ms: t.build_total_ms,
                        subst_count: t.subst_count,
                        subst_total_ms: t.subst_total_ms,
                        download_bytes: t.download_bytes,
                        build_durations: vec![],
                        subst_durations: vec![],
                    });
                } else if let Some(last) = buckets.last_mut() {
                    // Today falls in the same week/month bucket as the last historical day.
                    last.build_count += t.build_count;
                    last.build_total_ms += t.build_total_ms;
                    last.subst_count += t.subst_count;
                    last.subst_total_ms += t.subst_total_ms;
                    last.download_bytes += t.download_bytes;
                }
            }
        }
    }

    Ok(buckets)
}

pub fn collect_trend(
    db: &Mutex<Connection>,
    since: Option<i64>,
    bucket: BucketSize,
    drv: Option<String>,
    today: Option<TodaySummary>,
    full: bool,
) -> Result<Trend> {
    if let Some(ref d) = drv {
        assert!(!d.is_empty(), "drv filter must not be empty");
    }

    let conn = db.lock().unwrap();

    // daily_stats path when individual samples are not needed, no drv filter is active
    // (daily_stats has no per-drv breakdown), and bucket granularity is at least a day
    // (daily_stats has no intra-day resolution).
    let buckets = if !full && drv.is_none() && !matches!(bucket, BucketSize::Hour) {
        trend_from_daily_stats(&conn, since, &bucket, today)?
    } else {
        trend_from_events(&conn, since, &bucket, drv.as_deref())?
    };

    for i in 1..buckets.len() {
        assert!(buckets[i].bucket > buckets[i - 1].bucket, "buckets must be strictly ascending");
    }

    Ok(Trend { buckets, bucket_size: bucket, drv_filter: drv })
}

pub fn display_trend(trend: &Trend) {
    let has_downloads = trend.drv_filter.is_none()
        && trend.buckets.iter().any(|b| b.download_bytes > 0);
    let bw = trend.bucket_size.col_width();

    if let Some(ref drv) = trend.drv_filter {
        println!("filter: {}", drv);
    }

    print!("{:<bw$}   {:>6}  {:>10}   {:>6}  {:>10}", "period", "builds", "build avg", "subst", "subst avg");
    if has_downloads { print!("   {:>8}", "dl (MB)"); }
    println!();

    if trend.buckets.is_empty() {
        println!("(no data)");
        return;
    }

    for b in &trend.buckets {
        let build_avg = if b.build_count > 0 { b.build_total_ms / b.build_count } else { 0 };
        let subst_avg = if b.subst_count > 0 { b.subst_total_ms / b.subst_count } else { 0 };

        print!("{:<bw$}   {:>6}  {:>10}   {:>6}  {:>10}",
            b.bucket, b.build_count, fmt_ms(build_avg),
            b.subst_count, fmt_ms(subst_avg));
        if has_downloads { print!("   {:>8.1}", b.download_bytes as f64 / 1_048_576.0); }
        println!();
    }
}

fn print_test_section<F>(label: &str, buckets: &[TrendBucket], get_durs: F, bw: usize)
where
    F: Fn(&TrendBucket) -> &[i64],
{
    let any_data = buckets.iter().any(|b| !get_durs(b).is_empty());
    if !any_data { return; }

    println!("{}", label);
    println!("{:<bw$}   {:>5}   {:>10}   {:>7}   {:>8}", "period", "n", "median", "Δ", "p-value");

    for (i, b) in buckets.iter().enumerate() {
        let durs = get_durs(b);
        let mut sorted = durs.to_vec(); sorted.sort_unstable();
        let med = if sorted.is_empty() { 0.0 } else { median_sorted(&sorted) };

        let (delta_str, p_str) = if i == 0 || get_durs(&buckets[i - 1]).is_empty() {
            (String::new(), String::new())
        } else {
            let prev = get_durs(&buckets[i - 1]);
            let mut prev_sorted = prev.to_vec(); prev_sorted.sort_unstable();
            let prev_med = median_sorted(&prev_sorted);

            let delta = if prev_med > 0.0 {
                let pct = (med - prev_med) / prev_med * 100.0;
                format!("{}{:.0}%", if pct >= 0.0 { "+" } else { "" }, pct)
            } else {
                String::new()
            };

            let p = match mann_whitney_u(prev, durs) {
                Some(r) => format!("{:.3}", r.p_value),
                None    => String::new(),
            };

            (delta, p)
        };

        println!("{:<bw$}   {:>5}   {:>10}   {:>7}   {:>8}",
            b.bucket, durs.len(), fmt_ms(med as i64), delta_str, p_str);
    }
    println!();
}

pub fn display_trend_test(trend: &Trend) {
    let bw = trend.bucket_size.col_width();

    if let Some(ref drv) = trend.drv_filter {
        println!("filter: {}", drv);
    }

    print_test_section("builds",        &trend.buckets, |b| &b.build_durations, bw);
    print_test_section("substitutions", &trend.buckets, |b| &b.subst_durations, bw);

    println!("Mann-Whitney U (two-tailed). H0: adjacent periods have the same duration distribution.");
}

pub fn output_csv_trend(trend: &Trend) {
    println!("period,build_count,build_avg_ms,subst_count,subst_avg_ms,download_bytes");
    for b in &trend.buckets {
        let build_avg = if b.build_count > 0 { b.build_total_ms / b.build_count } else { 0 };
        let subst_avg = if b.subst_count > 0 { b.subst_total_ms / b.subst_count } else { 0 };
        assert!(build_avg >= 0);
        assert!(subst_avg >= 0);
        println!("{},{},{},{},{},{}", b.bucket, b.build_count, build_avg, b.subst_count, subst_avg, b.download_bytes);
    }
}

pub fn display_stats(stats: Stats) {
    let build_avg = if stats.build_count > 0 { stats.build_total_ms / stats.build_count } else { 0 };
    let subst_avg = if stats.subst_count > 0 { stats.subst_total_ms / stats.subst_count } else { 0 };
    let mb = stats.download_bytes as f64 / 1_048_576.0;
    let dl_speed = if stats.download_ms > 0 { mb / (stats.download_ms as f64 / 1000.0) } else { 0.0 };

    println!("{:<14} {:>6}   total {:>9}   avg {:>9}", "built", stats.build_count, fmt_ms(stats.build_total_ms), fmt_ms(build_avg));
    println!("{:<14} {:>6}   total {:>9}   avg {:>9}", "substituted", stats.subst_count, fmt_ms(stats.subst_total_ms), fmt_ms(subst_avg));
    println!("{:<14}          {:>8.1} MB   avg {:>6.1} MB/s", "downloaded", mb, dl_speed);

    if !stats.slowest_builds.is_empty() {
        let grouped = stats.slowest_builds.first().map(|b| b.count.is_some()).unwrap_or(false);
        println!();
        for row in &stats.slowest_builds {
            let path = row.drv_path.as_deref().or(row.text.as_deref()).unwrap_or("?");
            if grouped {
                println!("{:>9}  ({:>3}x)  {}", fmt_ms(row.duration_ms), row.count.unwrap_or(0), drv_name(path));
            } else {
                println!("{:>9}  {}", fmt_ms(row.duration_ms), drv_name(path));
            }
        }
    }

    if !stats.cache_latency.is_empty() {
        let url_w = stats.cache_latency.iter().map(|r| r.cache_url.len()).max().unwrap_or(0);
        println!();
        for row in &stats.cache_latency {
            println!("{:<width$}   avg {:>7}   {:>6} queries", row.cache_url, fmt_ms(row.avg_ms as i64), row.count, width = url_w);
        }
    }
}