pop/timeline: visual-score debug PNG (AC dark + monospace)

+303

1 changed file

expand all

pop

bin

timeline.py

+303

pop/bin/timeline.py

··· 1 + #!/usr/bin/env python3 2 + # timeline.py — visual-score PNG: storyboard expectations vs whisper 3 + # utterances vs pitchsnap autotune marks vs actual audio waveform. 4 + # eye-validation tool for checking when words are SUPPOSED to fire, 5 + # when whisper THOUGHT they fired (in the source vocal), where 6 + # pitchsnap MOVED them, and where the actual audio peaks land. 7 + # 8 + # style: aesthetic.computer dark — black bg, cream type, saturated 9 + # accent per panel, chunky monospace, big enough to read across the 10 + # room. axis labels are alternated/rotated to avoid vertical overlap. 11 + # 12 + # four panels, top → bottom, sharing the time axis: 13 + # 1. piano roll — one bar per slide (note × time), lyric LARGE inside. 14 + # 2. utterances — whisper word windows. 15 + # 3. autotune — arrows naturalStart → snappedStart (zigzag labels). 16 + # 4. waveform — audio + librosa onsets vs storyboard slide.starts. 17 + # 18 + # usage: 19 + # .venv/bin/python bin/timeline.py \ 20 + # --storyboard big-pictures/out/amazing.storyboard.json \ 21 + # --audio big-pictures/out/amazing-final.mp3 \ 22 + # [--words ...] [--events ...] \ 23 + # --out ~/Desktop/amazing-timing.png 24 + 25 + import argparse, json, os, sys 26 + import numpy as np 27 + import matplotlib.pyplot as plt 28 + import matplotlib.patches as mpatches 29 + import matplotlib.patheffects as pe 30 + from matplotlib.colors import Normalize 31 + from matplotlib.cm import ScalarMappable 32 + 33 + # ── AC PALETTE ───────────────────────────────────────────────────── 34 + BG = "#0a0a14" # ink-black, slight blue tilt 35 + PANEL_BG = "#10101e" # alternating panel back 36 + GRID = "#1f1f30" # subtle grid lines 37 + FG = "#f3f0d8" # cream type 38 + DIM = "#7f7d68" # secondary type 39 + ACCENT = "#5fe8b8" # mint — primary accent (bell trigger, slide.start) 40 + GREEN = "#7fe070" # whisper utterance band 41 + ORANGE = "#ff8a3d" # score expected 42 + CYAN = "#5fd0ff" # waveform / actual 43 + PURPLE = "#c87dff" # secondary 44 + RED = "#ff5566" # autotune up 45 + BLUE = "#5588ff" # autotune down 46 + 47 + # Monospace stack — matplotlib will resolve in order 48 + plt.rcParams.update({ 49 + "figure.facecolor": BG, 50 + "axes.facecolor": PANEL_BG, 51 + "axes.edgecolor": DIM, 52 + "axes.labelcolor": FG, 53 + "axes.titlecolor": FG, 54 + "xtick.color": FG, 55 + "ytick.color": FG, 56 + "text.color": FG, 57 + "font.family": "monospace", 58 + "font.monospace": ["Menlo", "DejaVu Sans Mono", "Consolas", "Courier New"], 59 + "font.weight": "bold", 60 + "axes.labelweight": "bold", 61 + "axes.titleweight": "bold", 62 + "axes.linewidth": 1.4, 63 + "xtick.major.width": 1.4, 64 + "ytick.major.width": 1.4, 65 + "xtick.major.size": 6, 66 + "ytick.major.size": 6, 67 + }) 68 + 69 + NOTE_BASE = {"C":0,"C#":1,"DB":1,"D":2,"D#":3,"EB":3,"E":4,"F":5,"F#":6, 70 + "GB":6,"G":7,"G#":8,"AB":8,"A":9,"A#":10,"BB":10,"B":11} 71 + def note_to_midi(n): 72 + s = n.upper() 73 + octave = int(s[-1]) if s[-1].isdigit() else 4 74 + name = s[:-1] if s[-1].isdigit() else s 75 + return 12 * (octave + 1) + NOTE_BASE[name] 76 + 77 + def midi_label(m): 78 + octave = m // 12 - 1 79 + pitch = ["C","C#","D","D#","E","F","F#","G","G#","A","A#","B"][m % 12] 80 + return f"{pitch}{octave}" 81 + 82 + def stroke(width=3, fg=BG): 83 + """text path-effect for max contrast outline""" 84 + return [pe.withStroke(linewidth=width, foreground=fg)] 85 + 86 + def main(): 87 + ap = argparse.ArgumentParser() 88 + ap.add_argument("--storyboard", required=True) 89 + ap.add_argument("--audio", required=True) 90 + ap.add_argument("--words", default=None) 91 + ap.add_argument("--events", default=None) 92 + ap.add_argument("--out", required=True) 93 + ap.add_argument("--title", default=None) 94 + args = ap.parse_args() 95 + 96 + sb = json.load(open(args.storyboard)) 97 + slides = sb["slides"] 98 + total = float(sb.get("audioDuration", sb.get("duration", 0))) 99 + bpm = sb.get("bpm", 100) 100 + slug = sb.get("slug", "?") 101 + title = args.title or f"{slug.upper()} {bpm} BPM {total:.1f}s {len(slides)} slides" 102 + 103 + # ── auto-find optional sources ─────────────────────────────────── 104 + audio_dir = os.path.dirname(os.path.abspath(args.audio)) 105 + audio_stem = os.path.splitext(os.path.basename(args.audio))[0] 106 + if args.words is None: 107 + for cand in ( 108 + f"{audio_dir}/{audio_stem}-words.json", 109 + f"{audio_dir}/{slug}-perline-words.json", 110 + f"{audio_dir}/{slug}-7-warm-words.json", 111 + f"{audio_dir}/{slug}-warm-words.json", 112 + f"{audio_dir}/{slug}-vocal-words.json", 113 + f"{audio_dir}/{slug}-sung-words.json", 114 + ): 115 + if os.path.exists(cand): 116 + args.words = cand 117 + print(f" words: {cand}") 118 + break 119 + if args.events is None: 120 + for cand in ( 121 + f"{audio_dir}/{audio_stem}.events.json", 122 + f"{audio_dir}/{slug}-beat.events.json", 123 + ): 124 + if os.path.exists(cand): 125 + args.events = cand 126 + print(f" events: {cand}") 127 + break 128 + 129 + words = json.load(open(args.words)) if args.words and os.path.exists(args.words) else [] 130 + if words: print(f" {len(words)} whisper words") 131 + events = [] 132 + if args.events and os.path.exists(args.events): 133 + ev = json.load(open(args.events)) 134 + events = ev.get("events", ev) if isinstance(ev, dict) else ev 135 + print(f" {len(events)} pitchsnap events") 136 + 137 + try: 138 + import librosa 139 + y, sr = librosa.load(args.audio, sr=22050, mono=True) 140 + onsets = librosa.onset.onset_detect(y=y, sr=sr, units="time") 141 + print(f" {len(onsets)} onsets") 142 + except Exception as e: 143 + print(f" ! librosa failed: {e}") 144 + y, sr = None, None 145 + onsets = [] 146 + 147 + # ── figure layout — generous heights for readability ───────────── 148 + height_ratios = [5.0] # piano roll (tallest) 149 + if words: height_ratios.append(1.6) # utterances 150 + if events: height_ratios.append(2.4) # autotune (zigzag needs space) 151 + height_ratios.append(2.8) # waveform 152 + n_panels = len(height_ratios) 153 + fig_w = max(22, total * 0.85) 154 + fig_h = sum(height_ratios) * 1.15 + 0.6 155 + fig, axs = plt.subplots( 156 + n_panels, 1, figsize=(fig_w, fig_h), 157 + gridspec_kw={"height_ratios": height_ratios, "hspace": 0.18}, 158 + sharex=True 159 + ) 160 + if n_panels == 1: axs = [axs] 161 + fig.suptitle(title, fontsize=28, fontweight="bold", color=FG, y=0.985, 162 + path_effects=stroke(4)) 163 + 164 + panel_idx = 0 165 + 166 + # ── 1. PIANO ROLL ──────────────────────────────────────────────── 167 + ax_roll = axs[panel_idx]; panel_idx += 1 168 + midis = [note_to_midi(s["note"]) for s in slides] 169 + midi_min, midi_max = min(midis) - 1, max(midis) + 1 170 + # alternating pitch lanes 171 + for m in range(midi_min, midi_max + 1): 172 + if m % 2 == 0: 173 + ax_roll.axhspan(m - 0.5, m + 0.5, color=BG, zorder=0) 174 + cmap = plt.get_cmap("plasma") 175 + for i, s in enumerate(slides): 176 + m = note_to_midi(s["note"]) 177 + x, w = s["start"], s["end"] - s["start"] 178 + col = cmap((i / max(1, len(slides) - 1)) * 0.85 + 0.05) 179 + ax_roll.add_patch(mpatches.FancyBboxPatch( 180 + (x, m - 0.42), w, 0.84, 181 + boxstyle="round,pad=0.02,rounding_size=0.05", 182 + linewidth=1.2, edgecolor=FG, facecolor=col, zorder=2, 183 + )) 184 + # lyric INSIDE bar — auto-shrink for tight ones 185 + bar_px_estimate = w / total * fig_w * 80 186 + text_size = 22 if len(s["text"]) <= 3 else (18 if len(s["text"]) <= 5 else 15) 187 + if bar_px_estimate < 60: text_size = max(11, text_size - 6) 188 + ax_roll.text(x + w / 2, m, s["text"], 189 + ha="center", va="center", fontsize=text_size, 190 + fontweight="bold", color=FG, zorder=4, 191 + path_effects=stroke(3)) 192 + # bell trigger tick 193 + ax_roll.plot([x, x], [midi_min - 0.5, m - 0.42], 194 + color=ACCENT, linewidth=1.3, alpha=0.55, zorder=3) 195 + ax_roll.set_yticks(range(midi_min, midi_max + 1)) 196 + ax_roll.set_yticklabels([midi_label(m) for m in range(midi_min, midi_max + 1)], 197 + fontsize=13) 198 + ax_roll.set_ylim(midi_min - 0.6, midi_max + 0.6) 199 + ax_roll.set_ylabel("SCORE", fontsize=18, labelpad=14, color=ACCENT) 200 + ax_roll.tick_params(axis="x", labelsize=12, length=5) 201 + ax_roll.grid(axis="x", color=GRID, linestyle="-", linewidth=0.7) 202 + ax_roll.set_axisbelow(True) 203 + ax_roll.plot([], [], color=ACCENT, linewidth=2.5, label="bell trigger") 204 + leg = ax_roll.legend(loc="upper right", fontsize=13, frameon=True, 205 + facecolor=PANEL_BG, edgecolor=DIM, labelcolor=FG) 206 + 207 + # ── 2. UTTERANCES ──────────────────────────────────────────────── 208 + if words: 209 + ax_utt = axs[panel_idx]; panel_idx += 1 210 + for w in words: 211 + x0 = w["fromMs"] / 1000.0 212 + x1 = w["toMs"] / 1000.0 213 + ax_utt.add_patch(mpatches.FancyBboxPatch( 214 + (x0, 0.18), x1 - x0, 0.64, 215 + boxstyle="round,pad=0.01,rounding_size=0.04", 216 + facecolor=GREEN, alpha=0.35, edgecolor=GREEN, 217 + linewidth=1.4, zorder=2)) 218 + ax_utt.text((x0 + x1) / 2, 0.5, w["text"], 219 + ha="center", va="center", fontsize=15, 220 + fontweight="bold", color=FG, zorder=3, 221 + path_effects=stroke(3)) 222 + ax_utt.set_yticks([]) 223 + ax_utt.set_ylim(0, 1) 224 + ax_utt.set_ylabel("HEARD", fontsize=18, labelpad=14, color=GREEN) 225 + ax_utt.tick_params(axis="x", labelsize=12, length=5) 226 + ax_utt.grid(axis="x", color=GRID, linestyle="-", linewidth=0.7) 227 + ax_utt.set_axisbelow(True) 228 + 229 + # ── 3. AUTOTUNE ────────────────────────────────────────────────── 230 + if events: 231 + ax_at = axs[panel_idx]; panel_idx += 1 232 + max_st = max(abs(e.get("semitones", 0)) for e in events) or 1.0 233 + for i, e in enumerate(events): 234 + nat = float(e.get("naturalStart", 0)) 235 + snap = float(e.get("snappedStart", nat)) 236 + st = float(e.get("semitones", 0)) 237 + # color: blue=down, red=up, dim if near zero 238 + col = BLUE if st < -0.5 else (RED if st > 0.5 else DIM) 239 + ax_at.annotate( 240 + "", xy=(snap, 0.5), xytext=(nat, 0.5), 241 + arrowprops=dict(arrowstyle="-|>", color=col, lw=2.0, 242 + shrinkA=0, shrinkB=0, mutation_scale=18), 243 + zorder=3, 244 + ) 245 + ax_at.plot(nat, 0.5, "o", color=col, markersize=8, zorder=4, 246 + markeredgecolor=FG, markeredgewidth=1.0) 247 + ax_at.plot(snap, 0.5, "s", color=col, markersize=10, zorder=5, 248 + markeredgecolor=FG, markeredgewidth=1.0) 249 + # ZIGZAG label heights to dodge horizontal collisions 250 + tag_y, st_y = (0.86, 0.14) if i % 2 == 0 else (0.72, 0.28) 251 + tag = e.get("targetNote", "") 252 + ax_at.text(snap, tag_y, tag, 253 + ha="center", va="bottom" if i % 2 == 0 else "top", 254 + fontsize=13, fontweight="bold", color=FG, zorder=5, 255 + path_effects=stroke(3)) 256 + ax_at.text(snap, st_y, f"{st:+.0f}st", 257 + ha="center", va="top" if i % 2 == 0 else "bottom", 258 + fontsize=11, color=col, zorder=5, 259 + path_effects=stroke(2)) 260 + ax_at.set_yticks([]) 261 + ax_at.set_ylim(0, 1) 262 + ax_at.set_ylabel("PITCH SNAP", fontsize=18, labelpad=14, color=PURPLE) 263 + ax_at.tick_params(axis="x", labelsize=12, length=5) 264 + ax_at.grid(axis="x", color=GRID, linestyle="-", linewidth=0.7) 265 + ax_at.set_axisbelow(True) 266 + ax_at.text(0.995, 0.96, 267 + "● natural ■ snapped blue▼ down red▲ up", 268 + transform=ax_at.transAxes, ha="right", va="top", 269 + fontsize=12, color=FG, 270 + bbox=dict(facecolor=BG, edgecolor=DIM, 271 + boxstyle="round,pad=0.4")) 272 + 273 + # ── 4. WAVEFORM ────────────────────────────────────────────────── 274 + ax_wave = axs[panel_idx] 275 + if y is not None: 276 + t = np.arange(len(y)) / sr 277 + ax_wave.plot(t, y, color=CYAN, linewidth=0.7, alpha=0.9, zorder=2) 278 + ax_wave.fill_between(t, 0, y, color=CYAN, alpha=0.20, zorder=2) 279 + for s in slides: 280 + ax_wave.axvline(s["start"], color=ORANGE, linewidth=1.2, alpha=0.65, zorder=3) 281 + for ot in onsets: 282 + ax_wave.axvline(ot, color=GREEN, linewidth=1.2, alpha=0.65, zorder=4) 283 + ax_wave.plot([], [], color=ORANGE, linewidth=2.5, label="EXPECTED slide.start") 284 + ax_wave.plot([], [], color=GREEN, linewidth=2.5, label="ACTUAL onset") 285 + ax_wave.legend(loc="upper right", fontsize=13, frameon=True, 286 + facecolor=PANEL_BG, edgecolor=DIM, labelcolor=FG) 287 + ax_wave.set_xlim(0, total) 288 + ax_wave.set_ylim(-1.05, 1.05) 289 + ax_wave.set_xlabel("TIME (s)", fontsize=16, labelpad=10) 290 + ax_wave.set_ylabel("AUDIO", fontsize=18, labelpad=14, color=CYAN) 291 + ax_wave.tick_params(axis="x", labelsize=13, length=6) 292 + ax_wave.tick_params(axis="y", labelsize=11) 293 + ax_wave.grid(axis="x", color=GRID, linestyle="-", linewidth=0.7) 294 + ax_wave.set_axisbelow(True) 295 + 296 + # generous outer margin so suptitle / labels don't get clipped 297 + plt.tight_layout(rect=[0.02, 0.01, 0.99, 0.96]) 298 + plt.savefig(args.out, dpi=120, bbox_inches="tight", 299 + facecolor=BG, edgecolor="none") 300 + print(f" ✓ {args.out}") 301 + 302 + if __name__ == "__main__": 303 + main()

Configure Feed

Configure Feed