this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add dark-mode inversion augmentation for handwriting splits

Randomly inverts 50% of handwriting images during training to expose the
model to dark-background variants. Gated to _HANDWRITING_SPLITS only --
typeset splits are excluded because color-only-distinct emojis (colored
circles/squares) lose discriminative information under RGB inversion.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

+17 -3
+17 -3
src/data.py
··· 17 17 18 18 import numpy as np 19 19 from torch.utils.data import Dataset 20 - from PIL import Image, ImageDraw, ImageFilter 20 + from PIL import Image, ImageDraw, ImageFilter, ImageOps 21 21 from torchvision.transforms import ElasticTransform, RandomPerspective 22 22 import torchvision.transforms.functional as TF 23 23 ··· 53 53 "mathwriting_train", "mathwriting_synthetic", "mathwriting_symbols", 54 54 "mathwriting_val", "mathwriting_test", 55 55 "typeset_train", "typeset_val", "typeset_test", 56 + } 57 + 58 + # Handwriting splits: black-ink-on-white images safe to invert for dark-mode augmentation. 59 + # Typeset splits are excluded because color-only-distinct emojis lose meaning under inversion. 60 + _HANDWRITING_SPLITS = { 61 + "crohme_gen_2019", "crohme_gen_2023", "crohme_gen_syntactic", "crohme_real_train", 62 + "mathwriting_train", "mathwriting_synthetic", "mathwriting_symbols", 63 + "mathwriting_val", "mathwriting_test", 56 64 } 57 65 58 66 PROMPT = "Transcribe this image to Typst notation. Output only the raw Typst, without explanation. No LaTeX, only Typst." ··· 329 337 return Image.fromarray(out[MY: MY + H, MX: MX + W]) 330 338 331 339 332 - def _augment(img: Image.Image, region_jitter: bool = True, typst: str = "") -> Image.Image: 340 + def _augment(img: Image.Image, region_jitter: bool = True, typst: str = "", 341 + allow_invert: bool = False) -> Image.Image: 333 342 """ 334 343 Augmentation for synthetic-to-real-notes robustness. 335 344 ··· 338 347 339 348 region_jitter: when True, applies _region_jitter with AUG_P_JITTER probability. 340 349 Pass False to run the page-level pipeline without block jitter. 350 + allow_invert: when True, randomly inverts the image for dark-mode augmentation. 351 + Only safe for handwriting splits (no color-only-distinct content). 341 352 """ 353 + if allow_invert and random.random() < 0.5: 354 + img = ImageOps.invert(img) 342 355 bg = _bg_color(img) 343 356 344 357 angle = random.uniform(-AUG_ANGLE_DEG, AUG_ANGLE_DEG) ··· 457 470 r = self.records[idx] 458 471 img = Image.open(r["image_path"]).convert("RGB") 459 472 if self.do_augment: 460 - img = _augment(img, typst=r.get("typst", "")) 473 + img = _augment(img, typst=r.get("typst", ""), 474 + allow_invert=r.get("split", "") in _HANDWRITING_SPLITS) 461 475 return self.format_fn(r, img) 462 476 463 477