Monorepo for Aesthetic.Computer aesthetic.computer
4
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 212 lines 7.6 kB view raw
1#!/usr/bin/env python3 2"""debug-composition.py — detect face + shirt-logo bboxes on chapter 3portraits and render debug stills so the slide layouter can avoid them. 4 5For each PNG in recap/out/jeffrey-photos/: 6 - detect face bbox via OpenCV Haar cascade (bundled, no external model) 7 - detect text regions in the chest band (below face) via tesseract OCR 8 - write recap/out/cv/<basename>.json with the bboxes 9 - write recap/out/debug/<basename>.png with overlaid colored boxes: 10 red = face (avoid) 11 yellow = shirt logo text (avoid) 12 cyan = recommended type-safe band (top + below shirt logos) 13 14Usage: 15 .venv/bin/python3 bin/debug-composition.py [photo_dir] [debug_dir] 16 (defaults: out/jeffrey-photos out/debug) 17""" 18 19from __future__ import annotations 20 21import json 22import os 23import sys 24from pathlib import Path 25 26import cv2 27import numpy as np 28import pytesseract 29from PIL import Image, ImageDraw, ImageFont 30 31 32ROOT = Path(__file__).resolve().parent.parent 33PHOTO_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else ROOT / "out" / "jeffrey-photos" 34DEBUG_DIR = Path(sys.argv[2]) if len(sys.argv) > 2 else ROOT / "out" / "debug" 35CV_DIR = ROOT / "out" / "cv" 36 37DEBUG_DIR.mkdir(parents=True, exist_ok=True) 38CV_DIR.mkdir(parents=True, exist_ok=True) 39 40# OpenCV's bundled Haar cascade — good enough for centered portrait shots. 41HAAR_PATH = Path(cv2.data.haarcascades) / "haarcascade_frontalface_default.xml" 42face_cascade = cv2.CascadeClassifier(str(HAAR_PATH)) 43if face_cascade.empty(): 44 raise RuntimeError(f"failed to load Haar cascade at {HAAR_PATH}") 45 46 47def detect_face(bgr: np.ndarray) -> tuple[int, int, int, int] | None: 48 """Return the largest face bbox (x, y, w, h) or None.""" 49 gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) 50 faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(120, 120)) 51 if len(faces) == 0: 52 return None 53 # Pick the largest by area (handles multi-jeffrey title slide too). 54 return tuple(max(faces, key=lambda f: f[2] * f[3]).tolist()) 55 56 57def detect_shirt_logos(bgr: np.ndarray, face: tuple | None) -> list[tuple[int, int, int, int]]: 58 """OCR the chest band (below the face, above mid-thigh) for printed 59 text regions. Returns list of bboxes in original image coords.""" 60 h, w = bgr.shape[:2] 61 if face is None: 62 # No face = guess chest band as 35–80% of frame height 63 y0, y1 = int(h * 0.35), int(h * 0.80) 64 else: 65 fx, fy, fw, fh = face 66 y0 = fy + fh # just below the chin 67 y1 = min(h, fy + fh + int(fh * 3.0)) # ~3 face-heights down 68 if y1 <= y0: 69 return [] 70 71 crop = bgr[y0:y1, :] 72 rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB) 73 pil = Image.fromarray(rgb) 74 # Upscale 2x for better OCR on small shirt prints 75 pil = pil.resize((pil.width * 2, pil.height * 2), Image.LANCZOS) 76 data = pytesseract.image_to_data(pil, output_type=pytesseract.Output.DICT, config="--psm 11") 77 78 boxes: list[tuple[int, int, int, int]] = [] 79 for i, txt in enumerate(data["text"]): 80 if not txt or not txt.strip(): 81 continue 82 try: 83 conf = float(data["conf"][i]) 84 except (TypeError, ValueError): 85 continue 86 if conf < 50: 87 continue 88 # Need at least 2 alpha chars to count as a logo (skip noise glyphs) 89 if sum(c.isalpha() for c in txt) < 2: 90 continue 91 x = data["left"][i] // 2 92 y = data["top"][i] // 2 + y0 93 bw = data["width"][i] // 2 94 bh = data["height"][i] // 2 95 boxes.append((int(x), int(y), int(bw), int(bh))) 96 return merge_close_boxes(boxes) 97 98 99def merge_close_boxes(boxes: list[tuple[int, int, int, int]], gap: int = 30): 100 """Merge horizontally-adjacent OCR fragments into single logo bboxes.""" 101 if not boxes: 102 return [] 103 boxes = sorted(boxes, key=lambda b: (b[1] // 50, b[0])) 104 merged = [list(boxes[0])] 105 for b in boxes[1:]: 106 last = merged[-1] 107 if abs(b[1] - last[1]) < 25 and b[0] - (last[0] + last[2]) < gap: 108 x = min(last[0], b[0]) 109 y = min(last[1], b[1]) 110 x2 = max(last[0] + last[2], b[0] + b[2]) 111 y2 = max(last[1] + last[3], b[1] + b[3]) 112 last[:] = [x, y, x2 - x, y2 - y] 113 else: 114 merged.append(list(b)) 115 return [tuple(b) for b in merged] 116 117 118def safe_bands(face: tuple | None, logos: list, w: int, h: int): 119 """Return cyan bands that are clear of face + logos. 120 Currently: top band above face, bottom band below logos (or mid-thigh).""" 121 bands = [] 122 if face is None: 123 bands.append((0, 0, w, int(h * 0.18))) 124 bands.append((0, int(h * 0.85), w, int(h * 0.15))) 125 return bands 126 127 fx, fy, fw, fh = face 128 # Top band above face top 129 top_h = max(0, fy - 30) 130 if top_h > 60: 131 bands.append((0, 0, w, top_h)) 132 # Bottom band below the lowest logo (or below face+2*fh if no logos) 133 if logos: 134 lowest = max(b[1] + b[3] for b in logos) 135 else: 136 lowest = fy + fh + int(fh * 2.0) 137 if h - lowest > 80: 138 bands.append((0, lowest + 10, w, h - lowest - 10)) 139 return bands 140 141 142def draw_debug(bgr: np.ndarray, face, logos, bands, label: str, dst: Path): 143 pil = Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)).convert("RGB") 144 draw = ImageDraw.Draw(pil) 145 146 # Cyan safe bands first (semi-transparent fill via outline-only here) 147 for x, y, bw, bh in bands: 148 for off in range(3): 149 draw.rectangle([x + off, y + off, x + bw - off, y + bh - off], outline=(0, 220, 220)) 150 151 # Yellow shirt logos 152 for x, y, bw, bh in logos: 153 draw.rectangle([x, y, x + bw, y + bh], outline=(255, 220, 0), width=4) 154 155 # Red face 156 if face is not None: 157 x, y, bw, bh = face 158 draw.rectangle([x, y, x + bw, y + bh], outline=(255, 50, 80), width=5) 159 160 # Caption strip 161 cap = f"{label} face={'yes' if face else 'no'} logos={len(logos)} safe-bands={len(bands)}" 162 try: 163 font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 24) 164 except Exception: 165 font = ImageFont.load_default() 166 draw.rectangle([0, 0, pil.width, 36], fill=(20, 10, 20)) 167 draw.text((12, 4), cap, fill=(245, 247, 252), font=font) 168 169 pil.save(dst) 170 171 172def run(): 173 pngs = sorted(p for p in PHOTO_DIR.glob("*.png")) 174 if not pngs: 175 print(f"no PNGs in {PHOTO_DIR}", file=sys.stderr) 176 return 1 177 178 summary = [] 179 for png in pngs: 180 bgr = cv2.imread(str(png)) 181 if bgr is None: 182 print(f" ✗ skip (unreadable): {png.name}") 183 continue 184 h, w = bgr.shape[:2] 185 face = detect_face(bgr) 186 logos = detect_shirt_logos(bgr, face) 187 bands = safe_bands(face, logos, w, h) 188 189 cv_path = CV_DIR / f"{png.stem}.json" 190 cv_path.write_text(json.dumps({ 191 "image": png.name, 192 "size": [w, h], 193 "face": list(face) if face else None, 194 "shirtLogos": [list(b) for b in logos], 195 "safeBands": [list(b) for b in bands], 196 }, indent=2)) 197 198 dbg_path = DEBUG_DIR / f"{png.stem}.png" 199 draw_debug(bgr, face, logos, bands, png.stem, dbg_path) 200 201 summary.append((png.name, "face" if face else "no-face", len(logos), len(bands))) 202 print(f"{png.name}: face={'yes' if face else 'no'} logos={len(logos)} safe={len(bands)}") 203 204 print() 205 print(f"→ wrote {len(summary)} cv/*.json + debug/*.png") 206 print(f" {CV_DIR}") 207 print(f" {DEBUG_DIR}") 208 return 0 209 210 211if __name__ == "__main__": 212 sys.exit(run())