my over complex system configurations dotfiles.isabelroses.com/
nixos nix flake dotfiles linux
9
fork

Configure Feed

Select the types of activity you want to include in your feed.

ci(eval-diff): add signifance

isabel bd1e56b1 32a0a9a8

+29 -3
+29 -3
.github/workflows/diff.yml
··· 153 153 def fmt(n): 154 154 return f'{n:.3f}' if isinstance(n, float) else f'{int(n):,}' 155 155 156 + def is_significant(before_runs, after_runs, key, threshold=0.05): 157 + """Welch's t-test to determine if the difference is significant.""" 158 + import math 159 + b_vals = [r[key] for r in before_runs if key in r] 160 + a_vals = [r[key] for r in after_runs if key in r] 161 + n_b, n_a = len(b_vals), len(a_vals) 162 + if n_b < 2 or n_a < 2: 163 + return False 164 + mean_b = statistics.mean(b_vals) 165 + mean_a = statistics.mean(a_vals) 166 + var_b = statistics.variance(b_vals) 167 + var_a = statistics.variance(a_vals) 168 + se = math.sqrt(var_b / n_b + var_a / n_a) 169 + if se == 0: 170 + return mean_a != mean_b 171 + t_stat = abs(mean_a - mean_b) / se 172 + # Approximate p-value using degrees of freedom via Welch-Satterthwaite 173 + num = (var_b / n_b + var_a / n_a) ** 2 174 + denom = (var_b / n_b) ** 2 / (n_b - 1) + (var_a / n_a) ** 2 / (n_a - 1) 175 + df = num / denom if denom > 0 else 1 176 + # Conservative t-critical values for two-tailed p<0.05 177 + # For df>=4 (our case with 5 runs each), t_crit ~ 2.78 (df=4) to 2.31 (df=8) 178 + t_crit = 2.78 if df <= 4 else 2.45 if df <= 6 else 2.31 179 + return t_stat > t_crit 180 + 156 181 lines = [ 157 - '| Metric | Before (mean ± σ) | After (mean ± σ) | Δ | % |', 158 - '|--------|-------------------|------------------|---|---|', 182 + '| Metric | Before (mean +/- σ) | After (mean +/- σ) | Δ | % | Sig? |', 183 + '|--------|---------------------|--------------------|----|---|------|', 159 184 ] 160 185 for key in all_keys: 161 186 b = before.get(key, 0) ··· 165 190 diff = a - b 166 191 pct = f'{diff / b * 100:+.1f}%' if b != 0 else 'N/A' 167 192 sign = '+' if diff > 0 else '' 168 - lines.append(f'| `{key}` | {fmt(b)} ± {fmt(bs)} | {fmt(a)} ± {fmt(as_)} | {sign}{fmt(diff)} | {pct} |') 193 + sig = 'Yes' if is_significant(before_runs, after_runs, key) else '' 194 + lines.append(f'| `{key}` | {fmt(b)} ± {fmt(bs)} | {fmt(a)} ± {fmt(as_)} | {sign}{fmt(diff)} | {pct} | {sig} |') 169 195 170 196 table = '\n'.join(lines) 171 197 with open('stats-table.md', 'w') as f: