Monorepo for Aesthetic.Computer
aesthetic.computer
1#!/usr/bin/env python3
2"""debug-composition.py — detect face + shirt-logo bboxes on chapter
3portraits and render debug stills so the slide layouter can avoid them.
4
5For each PNG in recap/out/jeffrey-photos/:
6 - detect face bbox via OpenCV Haar cascade (bundled, no external model)
7 - detect text regions in the chest band (below face) via tesseract OCR
8 - write recap/out/cv/<basename>.json with the bboxes
9 - write recap/out/debug/<basename>.png with overlaid colored boxes:
10 red = face (avoid)
11 yellow = shirt logo text (avoid)
12 cyan = recommended type-safe band (top + below shirt logos)
13
14Usage:
15 .venv/bin/python3 bin/debug-composition.py [photo_dir] [debug_dir]
16 (defaults: out/jeffrey-photos out/debug)
17"""
18
19from __future__ import annotations
20
21import json
22import os
23import sys
24from pathlib import Path
25
26import cv2
27import numpy as np
28import pytesseract
29from PIL import Image, ImageDraw, ImageFont
30
31
32ROOT = Path(__file__).resolve().parent.parent
33PHOTO_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else ROOT / "out" / "jeffrey-photos"
34DEBUG_DIR = Path(sys.argv[2]) if len(sys.argv) > 2 else ROOT / "out" / "debug"
35CV_DIR = ROOT / "out" / "cv"
36
37DEBUG_DIR.mkdir(parents=True, exist_ok=True)
38CV_DIR.mkdir(parents=True, exist_ok=True)
39
40# OpenCV's bundled Haar cascade — good enough for centered portrait shots.
41HAAR_PATH = Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml"
42face_cascade = cv2.CascadeClassifier(str(HAAR_PATH))
43if face_cascade.empty():
44 raise RuntimeError(f"failed to load Haar cascade at {HAAR_PATH}")
45
46
47def detect_face(bgr: np.ndarray) -> tuple[int, int, int, int] | None:
48 """Return the largest face bbox (x, y, w, h) or None."""
49 gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
50 faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(120, 120))
51 if len(faces) == 0:
52 return None
53 # Pick the largest by area (handles multi-jeffrey title slide too).
54 return tuple(max(faces, key=lambda f: f[2] * f[3]).tolist())
55
56
57def detect_shirt_logos(bgr: np.ndarray, face: tuple | None) -> list[tuple[int, int, int, int]]:
58 """OCR the chest band (below the face, above mid-thigh) for printed
59 text regions. Returns list of bboxes in original image coords."""
60 h, w = bgr.shape[:2]
61 if face is None:
62 # No face = guess chest band as 35–80% of frame height
63 y0, y1 = int(h * 0.35), int(h * 0.80)
64 else:
65 fx, fy, fw, fh = face
66 y0 = fy + fh # just below the chin
67 y1 = min(h, fy + fh + int(fh * 3.0)) # ~3 face-heights down
68 if y1 <= y0:
69 return []
70
71 crop = bgr[y0:y1, :]
72 rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
73 pil = Image.fromarray(rgb)
74 # Upscale 2x for better OCR on small shirt prints
75 pil = pil.resize((pil.width * 2, pil.height * 2), Image.LANCZOS)
76 data = pytesseract.image_to_data(pil, output_type=pytesseract.Output.DICT, config="--psm 11")
77
78 boxes: list[tuple[int, int, int, int]] = []
79 for i, txt in enumerate(data["text"]):
80 if not txt or not txt.strip():
81 continue
82 try:
83 conf = float(data["conf"][i])
84 except (TypeError, ValueError):
85 continue
86 if conf < 50:
87 continue
88 # Need at least 2 alpha chars to count as a logo (skip noise glyphs)
89 if sum(c.isalpha() for c in txt) < 2:
90 continue
91 x = data["left"][i] // 2
92 y = data["top"][i] // 2 + y0
93 bw = data["width"][i] // 2
94 bh = data["height"][i] // 2
95 boxes.append((int(x), int(y), int(bw), int(bh)))
96 return merge_close_boxes(boxes)
97
98
99def merge_close_boxes(boxes: list[tuple[int, int, int, int]], gap: int = 30):
100 """Merge horizontally-adjacent OCR fragments into single logo bboxes."""
101 if not boxes:
102 return []
103 boxes = sorted(boxes, key=lambda b: (b[1] // 50, b[0]))
104 merged = [list(boxes[0])]
105 for b in boxes[1:]:
106 last = merged[-1]
107 if abs(b[1] - last[1]) < 25 and b[0] - (last[0] + last[2]) < gap:
108 x = min(last[0], b[0])
109 y = min(last[1], b[1])
110 x2 = max(last[0] + last[2], b[0] + b[2])
111 y2 = max(last[1] + last[3], b[1] + b[3])
112 last[:] = [x, y, x2 - x, y2 - y]
113 else:
114 merged.append(list(b))
115 return [tuple(b) for b in merged]
116
117
118def safe_bands(face: tuple | None, logos: list, w: int, h: int):
119 """Return cyan bands that are clear of face + logos.
120 Currently: top band above face, bottom band below logos (or mid-thigh)."""
121 bands = []
122 if face is None:
123 bands.append((0, 0, w, int(h * 0.18)))
124 bands.append((0, int(h * 0.85), w, int(h * 0.15)))
125 return bands
126
127 fx, fy, fw, fh = face
128 # Top band above face top
129 top_h = max(0, fy - 30)
130 if top_h > 60:
131 bands.append((0, 0, w, top_h))
132 # Bottom band below the lowest logo (or below face+2*fh if no logos)
133 if logos:
134 lowest = max(b[1] + b[3] for b in logos)
135 else:
136 lowest = fy + fh + int(fh * 2.0)
137 if h - lowest > 80:
138 bands.append((0, lowest + 10, w, h - lowest - 10))
139 return bands
140
141
142def draw_debug(bgr: np.ndarray, face, logos, bands, label: str, dst: Path):
143 pil = Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)).convert("RGB")
144 draw = ImageDraw.Draw(pil)
145
146 # Cyan safe bands first (semi-transparent fill via outline-only here)
147 for x, y, bw, bh in bands:
148 for off in range(3):
149 draw.rectangle([x + off, y + off, x + bw - off, y + bh - off], outline=(0, 220, 220))
150
151 # Yellow shirt logos
152 for x, y, bw, bh in logos:
153 draw.rectangle([x, y, x + bw, y + bh], outline=(255, 220, 0), width=4)
154
155 # Red face
156 if face is not None:
157 x, y, bw, bh = face
158 draw.rectangle([x, y, x + bw, y + bh], outline=(255, 50, 80), width=5)
159
160 # Caption strip
161 cap = f"{label} face={'yes' if face else 'no'} logos={len(logos)} safe-bands={len(bands)}"
162 try:
163 font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 24)
164 except Exception:
165 font = ImageFont.load_default()
166 draw.rectangle([0, 0, pil.width, 36], fill=(20, 10, 20))
167 draw.text((12, 4), cap, fill=(245, 247, 252), font=font)
168
169 pil.save(dst)
170
171
172def run():
173 pngs = sorted(p for p in PHOTO_DIR.glob("*.png"))
174 if not pngs:
175 print(f"no PNGs in {PHOTO_DIR}", file=sys.stderr)
176 return 1
177
178 summary = []
179 for png in pngs:
180 bgr = cv2.imread(str(png))
181 if bgr is None:
182 print(f" ✗ skip (unreadable): {png.name}")
183 continue
184 h, w = bgr.shape[:2]
185 face = detect_face(bgr)
186 logos = detect_shirt_logos(bgr, face)
187 bands = safe_bands(face, logos, w, h)
188
189 cv_path = CV_DIR / f"{png.stem}.json"
190 cv_path.write_text(json.dumps({
191 "image": png.name,
192 "size": [w, h],
193 "face": list(face) if face else None,
194 "shirtLogos": [list(b) for b in logos],
195 "safeBands": [list(b) for b in bands],
196 }, indent=2))
197
198 dbg_path = DEBUG_DIR / f"{png.stem}.png"
199 draw_debug(bgr, face, logos, bands, png.stem, dbg_path)
200
201 summary.append((png.name, "face" if face else "no-face", len(logos), len(bands)))
202 print(f" ✓ {png.name}: face={'yes' if face else 'no'} logos={len(logos)} safe={len(bands)}")
203
204 print()
205 print(f"→ wrote {len(summary)} cv/*.json + debug/*.png")
206 print(f" {CV_DIR}")
207 print(f" {DEBUG_DIR}")
208 return 0
209
210
211if __name__ == "__main__":
212 sys.exit(run())