this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Track regenerated typeset splits with dark mode

+66 -34
+2 -2
data/typeset_mixed_test.dvc
··· 1 1 outs: 2 - - md5: 24e917ef40fa0c4f323f11a7997f45b5.dir 3 - size: 60886374 2 + - md5: a0dfa361d8fec376845cdf7ee9727b7a.dir 3 + size: 59523396 4 4 nfiles: 501 5 5 hash: md5 6 6 path: typeset_mixed_test
+2 -2
data/typeset_mixed_train.dvc
··· 1 1 outs: 2 - - md5: 1251d930791048937e518c6bb931a531.dir 3 - size: 2404966441 2 + - md5: fb6fedb3763860680cc334906e2f38b4.dir 3 + size: 2434231075 4 4 nfiles: 20001 5 5 hash: md5 6 6 path: typeset_mixed_train
+2 -2
data/typeset_mixed_val.dvc
··· 1 1 outs: 2 - - md5: 31b5d8765478c3f4d0512aec7a9f2a50.dir 3 - size: 60279129 2 + - md5: 8005f35c2af54519d08b6a5d6b485f44.dir 3 + size: 59104539 4 4 nfiles: 501 5 5 hash: md5 6 6 path: typeset_mixed_val
+2 -2
data/typeset_prose_test.dvc
··· 1 1 outs: 2 - - md5: 2fbe1559f3565093f631321c78ad6e7d.dir 3 - size: 55356034 2 + - md5: 153507a2b98e3059ab4e43d9e5fb24b2.dir 3 + size: 54081499 4 4 nfiles: 501 5 5 hash: md5 6 6 path: typeset_prose_test
+3 -3
data/typeset_prose_train.dvc
··· 1 1 outs: 2 - - md5: 7df77d9559943538754fa8fcec1d39cc.dir 3 - size: 643225173 4 - nfiles: 6001 2 + - md5: 17a437739b6d317f5a992cc754f2fde2.dir 3 + size: 1079329830 4 + nfiles: 10001 5 5 hash: md5 6 6 path: typeset_prose_train
+2 -2
data/typeset_prose_val.dvc
··· 1 1 outs: 2 - - md5: 3ee69f81a366cae891f81d2e84106219.dir 3 - size: 53767934 2 + - md5: 001f6ca0abfcb61a62bdf842c5e48406.dir 3 + size: 54159095 4 4 nfiles: 501 5 5 hash: md5 6 6 path: typeset_prose_val
+2 -2
data/typeset_uniform_test.dvc
··· 1 1 outs: 2 - - md5: 65b5718d0ccbc7bac93ba5f9bdabb88e.dir 3 - size: 33164561 2 + - md5: e345b759d0dc60764d77113eaf6d3e1a.dir 3 + size: 32757475 4 4 nfiles: 501 5 5 hash: md5 6 6 path: typeset_uniform_test
+2 -2
data/typeset_uniform_train.dvc
··· 1 1 outs: 2 - - md5: 926406071435ac5af54990b674166766.dir 3 - size: 644650286 2 + - md5: 7a4a6bd5396343cf11887e11abba17d0.dir 3 + size: 667750358 4 4 nfiles: 10001 5 5 hash: md5 6 6 path: typeset_uniform_train
+2 -2
data/typeset_uniform_val.dvc
··· 1 1 outs: 2 - - md5: 85df052205572241341ed2390adc3486.dir 3 - size: 32904580 2 + - md5: 4260a1b3f9064e1d77699f5ba7854c98.dir 3 + size: 34456319 4 4 nfiles: 501 5 5 hash: md5 6 6 path: typeset_uniform_val
+1 -1
src/augment_vis.py
··· 31 31 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter 32 32 ) 33 33 parser.add_argument("--split", default="typeset_mixed_train") 34 - parser.add_argument("--n", type=int, default=12) 34 + parser.add_argument("--n", type=int, default=20) 35 35 parser.add_argument("--seed", type=int, default=42) 36 36 parser.add_argument("--out", default="/tmp/augment_vis") 37 37 parser.add_argument("--images", nargs="+", metavar="IMG",
+46 -14
src/generate_typeset.py
··· 61 61 # "blackboard-style"; smaller sizes give tighter notes appearance. 62 62 _FONT_SIZES = [10, 10, 11, 11, 11, 12, 12, 13, 14] 63 63 64 - # Ink colours: mostly black, occasional dark blue / dark grey 64 + # Light-mode ink: mostly black, occasional coloured ink 65 65 _INK_COLOURS = [ 66 - "#000000", "#000000", "#000000", "#000000", # 4/7 pure black 66 + "#000000", "#000000", "#000000", "#000000", # 4/10 pure black 67 67 "#1a1a1a", # near-black 68 68 "#0d0d4d", # dark navy (pen ink) 69 69 "#2b2b2b", # dark grey 70 + "#1a4d1a", # dark green 71 + "#5c0000", # dark red / maroon 72 + "#2d0060", # dark purple 70 73 ] 71 74 75 + # Light-mode page fills: mostly white, occasional warm/cool tints 76 + _PAGE_FILLS_LIGHT = [ 77 + "#ffffff", "#ffffff", "#ffffff", "#ffffff", # 4/7 pure white 78 + "#fffef0", # warm cream 79 + "#fdf6e3", # solarized-light cream 80 + "#f0f4ff", # very light blue 81 + ] 82 + 83 + # Dark-mode variants: light ink on dark background 84 + _INK_COLOURS_DARK = [ 85 + "#ffffff", "#ffffff", "#ffffff", # pure white 86 + "#f0f0f0", # near-white 87 + "#e8e8ff", # blue-white (chalk feel) 88 + "#d0ffd0", # light green (green chalkboard) 89 + ] 90 + 91 + _PAGE_FILLS_DARK = [ 92 + "#1a1a1a", "#1a1a1a", "#1a1a1a", # near-black 93 + "#1e1e1e", # editor dark 94 + "#0d1117", # deep black 95 + "#2d2d2d", # dark grey 96 + ] 97 + 98 + DARK_MODE_PROB = 0.50 99 + 72 100 73 101 def _available_fonts(font_dir: Path) -> list[str]: 74 102 """Return Typst family names whose TTF files exist in font_dir.""" ··· 90 118 # Math equations continue using Typst's built-in math fonts (realistic). 91 119 _TEMPLATE_HW = ( 92 120 "#set page(width: {width}, height: auto, " 93 - "margin: (x: 12pt, y: 12pt), fill: white)\n" 121 + "margin: (x: 12pt, y: 12pt), fill: rgb(\"{fill}\"))\n" 94 122 '#set text(font: ("{font}", "New Computer Modern"), ' 95 123 "size: {size}pt, fill: rgb(\"{ink}\"), fallback: true)\n" 96 124 "#set list(spacing: 1.2em)\n" ··· 102 130 # wrapped in #text(font: ...) -- handled in _apply_mixed_fonts(). 103 131 _TEMPLATE_MIX = ( 104 132 "#set page(width: {width}, height: auto, " 105 - "margin: (x: 12pt, y: 12pt), fill: white)\n" 133 + "margin: (x: 12pt, y: 12pt), fill: rgb(\"{fill}\"))\n" 134 + "#set text(fill: rgb(\"{ink}\"))\n" 106 135 "#set list(spacing: 1.0em)\n" 107 136 "#set enum(spacing: 1.0em)\n" 108 137 "{body}\n" ··· 153 182 font_dir: Path, 154 183 size: int, 155 184 ink: str, 185 + fill: str, 156 186 rng: random.Random, 157 187 ) -> tuple[bool, str]: 158 188 """ ··· 163 193 """ 164 194 if mode == "uniform" and font is not None: 165 195 src = _TEMPLATE_HW.format( 166 - width=page_width, font=font, size=size, ink=ink, body=body 196 + width=page_width, font=font, size=size, ink=ink, fill=fill, body=body 167 197 ) 168 198 else: 169 199 # font=None (default) or mix mode: use plain template 170 - src = _TEMPLATE_MIX.format(width=page_width, body=body) 200 + src = _TEMPLATE_MIX.format(width=page_width, ink=ink, fill=fill, body=body) 171 201 172 202 with tempfile.NamedTemporaryFile(suffix=".typ", mode="w", delete=False) as f: 173 203 f.write(src) ··· 257 287 seen.add(body) 258 288 font = _pick_font(rng, available) 259 289 size = rng.choice(_FONT_SIZES) 260 - ink = rng.choice(_INK_COLOURS) 261 - candidates.append((body, page_width, font, size, ink)) 290 + dark = rng.random() < DARK_MODE_PROB 291 + ink = rng.choice(_INK_COLOURS_DARK if dark else _INK_COLOURS) 292 + fill = rng.choice(_PAGE_FILLS_DARK if dark else _PAGE_FILLS_LIGHT) 293 + candidates.append((body, page_width, font, size, ink, fill)) 262 294 pbar.update(1) 263 295 264 296 print(f" {attempts:,} attempts ({attempts / len(candidates):.1f}x overhead)") ··· 271 303 shown_failures = 0 272 304 273 305 def _task( 274 - clean_body: str, page_width: str, font: str, size: int, ink: str 306 + clean_body: str, page_width: str, font: str, size: int, ink: str, fill: str 275 307 ) -> tuple[str, str, bool, str]: 276 308 # prose renders as a uniform-font document (one font, whole page) 277 309 if args.mode in ("uniform", "prose"): ··· 281 313 clean_body, rng, font, hw_prob=args.hw_prob, size=size, ink=ink 282 314 ) 283 315 284 - # Hash over render body + font + size + ink + mode for uniqueness 285 - h_key = f"{args.mode}:{font}:{size}:{ink}:{page_width}:{render_body}" 316 + # Hash over render body + font + size + ink + fill + mode for uniqueness 317 + h_key = f"{args.mode}:{font}:{size}:{ink}:{fill}:{page_width}:{render_body}" 286 318 h = hashlib.sha1(h_key.encode()).hexdigest()[:16] 287 319 out_path = img_dir / f"{h}.png" 288 320 ··· 290 322 ok, err = _render( 291 323 render_body, out_path, page_width, 292 324 mode=render_mode, font=font, font_dir=font_dir, size=size, ink=ink, 293 - rng=rng, 325 + fill=fill, rng=rng, 294 326 ) 295 327 # Manifest always records the CLEAN body (no font directives). 296 328 return clean_body, f"images/{h}.png", ok, err, font 297 329 298 330 with ThreadPoolExecutor(max_workers=args.jobs) as pool: 299 331 futs = { 300 - pool.submit(_task, body, pw, font, size, ink): body 301 - for body, pw, font, size, ink in candidates 332 + pool.submit(_task, body, pw, font, size, ink, fill): body 333 + for body, pw, font, size, ink, fill in candidates 302 334 } 303 335 with tqdm(total=len(candidates), unit="img") as pbar: 304 336 for fut in as_completed(futs):