···18181919import numpy as np
2020from collections import deque
2121-from typing import Dict
2222-from scipy.stats import ks_2samp
2121+from typing import Dict, Tuple
23222323+def fast_ks_2samp(data1: np.ndarray, data2: np.ndarray) -> Tuple[float, float]:
2424+ """
2525+ Sub-millisecond Kolmogorov-Smirnov test algorithm.
2626+ Optimized for anomaly detection streaming where arrays are small.
2727+ """
2828+ data1 = np.sort(data1)
2929+ data2 = np.sort(data2)
3030+ n1 = len(data1)
3131+ n2 = len(data2)
3232+ data_all = np.concatenate([data1, data2])
3333+ cdf1 = np.searchsorted(data1, data_all, side='right') / n1
3434+ cdf2 = np.searchsorted(data2, data_all, side='right') / n2
3535+ d = np.max(np.abs(cdf1 - cdf2))
3636+3737+ en = np.sqrt(n1 * n2 / (n1 + n2))
3838+ # Asymptotic approximation of the true KS p-value formula
3939+ pval = 2 * np.exp(-2.0 * (en * d) ** 2)
4040+ return d, min(float(pval), 1.0)
24412542class SlidingWindowDetector:
2643 """
···123140 cur_q = self.cur_windows[key]
124141 ref_q = self.ref_windows[key]
125142126126- # --- Anomaly test ---
143143+ # Anomaly test
127144 if len(cur_q) >= self.window_size and len(ref_q) >= 20:
128128- # Primary: KS distribution-shift test
129129- _, pval = ks_2samp(list(ref_q), list(cur_q))
145145+ # Primary: Fast KS distribution-shift test
146146+ _, pval = fast_ks_2samp(list(ref_q), list(cur_q))
130147 is_anomalous = pval < self.p_threshold
131148132149 if not is_anomalous:
profile.txt
This is a binary file and will not be displayed.
+1-1
scripts/benchmark.py
···619619620620 print("\nNote: good calibration means Mean Conf ≈ Actual Acc in each bin.")
621621622622- # ── convenience: cause_to_kwargs (static alias for calibration) ────
622622+ # convenience: cause_to_kwargs (static alias for calibration)
623623 @staticmethod
624624 def _cause_to_kwargs(cause: str, severity: float) -> dict:
625625 if cause == "solar_degradation":
+2-3
scripts/nasa_benchmark.py
···77Telemanom LSTM paper so results are directly comparable.
8899Evaluation Protocol (sequence-level, industry standard)
1010---------------------------------------------------------
1010+1111- True Positive (TP): at least one alarm fires inside an anomaly window.
1212- False Positive (FP): an alarm fires with no overlap to any anomaly window.
1313 Consecutive alarms in the same non-anomaly region count as ONE FP event.
···3333DATA_DIR = "smap&msl_dataset/data/data/test"
3434LABELS_PATH = "smap&msl_dataset/labeled_anomalies.csv"
35353636-# ── Baselines from published literature ─────────────────────────────────────
3636+# Baselines from published literature
3737LSTM_PRECISION = 0.851
3838LSTM_RECALL = 0.853
3939LSTM_F1 = 0.852
···119119 print(f" [{idx:3d}/{len(labels_df)}] {chan_id} — "
120120 f"TP={chan_tp}/{len(anomaly_seqs)} FP_events={chan_fp}")
121121122122- # ── Metrics ─────────────────────────────────────────────────────────────
123122 precision = tp_seqs / (tp_seqs + fp_events) if (tp_seqs + fp_events) > 0 else 0.0
124123 recall = tp_seqs / total_seqs if total_seqs > 0 else 0.0
125124 f1 = (2 * precision * recall / (precision + recall)
+3-5
scripts/streaming_benchmark.py
···55fixed-threshold alarm system for solar degradation faults.
6677Lead-Time Definition
88---------------------
88+99Lead time = t_threshold_alarm – t_aethelix_detection
10101111Where:
···1919(lead time = undefined / +∞ advantage).
20202121Methodology
2222------------
2222+232350 scenarios (seed=42), solar degradation 15–40% injected at T=6h.
2424Each sample is 10 seconds (0.1 Hz). Results in seconds.
2525"""
···100100 if deviation > THRESHOLD_FRACTION:
101101 t_threshold = t
102102103103- # Aethelix detection
104103 if t_aethelix is None:
105104 tick = {
106105 "solar_input" : solar_val,
···127126 aethelix_miss += 1
128127 lead_s = None
129128 elif t_threshold is None:
130130- # Threshold never fired — Aethelix-only detection (infinite advantage)
131129 threshold_miss += 1
132130 lead_s = None # handled separately
133131 else:
134132 lead_s = (t_threshold - t_aethelix) * dt_per_sample
135133 lead_times_s.append(lead_s)
136134137137- # ── Summary ─────────────────────────────────────────────────────────────
135135+138136 if lead_times_s:
139137 mean_lead = np.mean(lead_times_s)
140138 median_lead = np.median(lead_times_s)
+3-3
scripts/subthreshold_benchmark.py
···22Sub-threshold fault detection benchmark.
3344Evaluates Aethelix's ability to detect faults below the operational 15% alarm
55-threshold — the regime where traditional alarm systems fail by design.
55+threshold - the regime where traditional alarm systems fail by design.
6677Fault Severity Range: 5–12% degradation of solar input power.
88 - Traditional threshold alarm: 0% detection (misses by design).
···1010 - Aethelix causal: see measured result below.
11111212Methodology
1313------------
1313+1414100 reproducible scenarios (seed=42) injected at T+6h with solar degradation
1515drawn from Uniform(0.05, 0.12). Detection is confirmed when Aethelix produces
1616a hypothesis with confidence ≥ 40% (meaningful, not trivial) for
···100100 print(f" Scenario {i+1:3d}/{num_scenarios} | "
101101 f"Detected so far: {detected_count}")
102102103103- # ── False-positive rate (clean data) ────────────────────────────────────
103103+ # False-positive rate (clean data)
104104 fp_count = 0
105105 for _ in range(30):
106106 sim = PowerSimulator(duration_hours=24, sampling_rate_hz=0.1)