this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

WIP: Add generate_mixed dataset for more structure

+543 -35
+1
pyproject.toml
··· 28 28 train-hnm = "src.train_hnm:main" 29 29 export = "src.export:main" 30 30 generate-typeset = "src.generate_typeset:main" 31 + generate-mixed = "src.generate_mixed:main" 31 32 probe = "src.probe:main" 32 33 app = "src.app:main" 33 34 probe-deepseek = "src.probe_deepseek:main"
+4 -3
src/data.py
··· 29 29 "mathwriting_synthetic", 30 30 "mathwriting_symbols", 31 31 "typeset_train", 32 + "typeset_mixed_train", 32 33 ] 33 - VAL_SPLITS = ["mathwriting_val", "typeset_val"] 34 - TEST_SPLITS = ["mathwriting_test", "typeset_test"] 34 + VAL_SPLITS = ["mathwriting_val", "typeset_val", "typeset_mixed_val"] 35 + TEST_SPLITS = ["mathwriting_test", "typeset_test", "typeset_mixed_test"] 35 36 36 - PROMPT = "Transcribe this mathematical expression to Typst math notation." 37 + PROMPT = "Transcribe this image to Typst notation." 37 38 BASE_MODEL = "unsloth/gemma-4-E2B-it" 38 39 39 40
+304
src/generate_mixed.py
··· 1 + """ 2 + Generate mixed text+math synthetic dataset for Typst OCR. 3 + 4 + Each sample is either an inline sequence or a list (possibly nested), combining 5 + word-salad text spans and math expressions. Text groups within a sequence are 6 + styled independently with bold/italic. 7 + 8 + The manifest typst field stores the content body only (no #set page header), 9 + matching what the model should output. 10 + 11 + Usage: uv run generate-mixed 12 + uv run generate-mixed --count 15000 --out data/typeset_mixed_train 13 + uv run generate-mixed --count 500 --out data/typeset_mixed_val --seed 100 14 + uv run generate-mixed --count 500 --out data/typeset_mixed_test --seed 200 15 + """ 16 + 17 + import argparse 18 + import hashlib 19 + import json 20 + import random 21 + import subprocess 22 + import tempfile 23 + from concurrent.futures import ThreadPoolExecutor, as_completed 24 + from pathlib import Path 25 + 26 + from tqdm import tqdm 27 + 28 + from .generate_typeset import generate_expr 29 + 30 + 31 + # ── Emoji pools ─────────────────────────────────────────────────────────────── 32 + 33 + # Tier 1 (annotation + colored circles) weighted 2x via repetition 34 + _EMOJI = [ 35 + "✅", "✅", "❌", "❌", "⚠️", "⚠️", "💡", "💡", 36 + "🔴", "🔴", "🟠", "🟡", "🟢", "🟢", "🔵", "🔵", "🟣", "⚫", "⚪", 37 + # Tier 2 (colored squares, misc) 38 + "🟥", "🟧", "🟨", "🟩", "🟦", "🟪", 39 + "⭐", "🔥", "🏆", 40 + ] 41 + 42 + # ── Text pools ──────────────────────────────────────────────────────────────── 43 + 44 + _STARTERS = [ 45 + "Let", "Assume", "Given", "Therefore", "Consider", "Suppose", 46 + "Note", "Define", "Recall", "Observe", "Claim", "Proof", 47 + ] 48 + 49 + _CONNECTORS = [ 50 + "where", "and", "such that", "yields", "since", "thus", "hence", 51 + "satisfies", "with", "implies", "then", "otherwise", "provided", 52 + ] 53 + 54 + _WORDS = [w for w in [ 55 + # Generic mathematical prose 56 + "arbitrary", "bounded", "closed", "complete", "condition", "constant", 57 + "continuous", "defined", "dense", "derivation", "distinct", "distance", 58 + "equal", "equation", "even", "exact", "exists", "finite", "fixed", 59 + "following", "function", "general", "graph", "group", "holds", "index", 60 + "infinite", "integer", "large", "limit", "linear", "local", "measure", 61 + "model", "natural", "negative", "normal", "notation", "number", "object", 62 + "open", "order", "point", "positive", "prime", "proof", "proper", 63 + "property", "rational", "real", "regular", "result", "rule", "sequence", 64 + "series", "simple", "single", "smooth", "solution", "space", "stable", 65 + "standard", "statement", "system", "term", "theorem", "total", "type", 66 + "unique", "valid", "value", "variable", "vector", 67 + "above", "across", "again", "along", "also", "always", "among", 68 + "another", "apply", "basic", "because", "before", "being", "below", 69 + "between", "both", "case", "certain", "clear", "could", "each", "every", 70 + "example", "find", "first", "form", "from", "further", "have", "here", 71 + "high", "however", "into", "just", "keep", "known", "last", "less", 72 + "like", "long", "made", "make", "many", "mean", "more", "most", "must", 73 + "need", "next", "none", "only", "other", "over", "part", "path", 74 + "place", "same", "show", "side", "some", "such", "take", "than", 75 + "that", "their", "them", "there", "these", "they", "this", "those", 76 + "through", "time", "together", "under", "until", "upon", "used", 77 + "using", "very", "when", "which", "while", "with", "within", "work", 78 + "would", "write", "written", "zero", 79 + # Algebra / topology / analysis 80 + "injective", "surjective", "bijective", 81 + "homeomorphism", "diffeomorphism", "isomorphism", "homomorphism", 82 + "endomorphism", "automorphism", "morphism", 83 + "compact", "connected", "contractible", "simply", 84 + "convergent", "divergent", "uniformly", 85 + "orthogonal", "orthonormal", "symmetric", "antisymmetric", "skew", 86 + "commutative", "associative", "distributive", "idempotent", 87 + "invertible", "singular", "nonsingular", "diagonal", "triangular", 88 + "eigenvalue", "eigenvector", "spectrum", "trace", "determinant", 89 + "manifold", "topology", "metric", "seminorm", "subspace", "quotient", 90 + "algebra", "field", "ring", "module", "ideal", "lattice", "poset", 91 + "functor", "category", "adjoint", "covariant", "contravariant", 92 + "kernel", "image", "cokernel", "exact", "short", "long", 93 + # Physics / EE 94 + "Lagrangian", "Hamiltonian", "potential", "kinetic", "momentum", 95 + "angular", "torque", "energy", "entropy", "pressure", "volume", 96 + "frequency", "amplitude", "wavelength", "period", "phase", "flux", 97 + "electric", "magnetic", "charge", "current", "voltage", "impedance", 98 + "capacitance", "inductance", "resistance", "conductance", 99 + # CS / type theory 100 + "decidable", "undecidable", "computable", "recursive", "primitive", 101 + "polymorphic", "monomorphic", "parametric", "covariant", 102 + "deterministic", "nondeterministic", "probabilistic", 103 + "inductive", "coinductive", "initial", "terminal", 104 + "reduction", "substitution", "context", "judgment", "inference", 105 + ] if len(w) >= 4] 106 + 107 + 108 + # ── Markup helpers ──────────────────────────────────────────────────────────── 109 + 110 + def _styled(text: str, bold: bool, italic: bool) -> str: 111 + if bold and italic: 112 + return f"*_{text}_*" 113 + elif bold: 114 + return f"*{text}*" 115 + elif italic: 116 + return f"_{text}_" 117 + return text 118 + 119 + 120 + def _text_group( 121 + rng: random.Random, 122 + first_in_doc: bool = False, 123 + after_math: bool = False, 124 + ) -> str: 125 + n = rng.choices([1, 2, 3], weights=[3, 5, 2])[0] 126 + words: list[str] = [] 127 + 128 + if first_in_doc and rng.random() < 0.55: 129 + words.append(rng.choice(_STARTERS)) 130 + n -= 1 131 + elif after_math and rng.random() < 0.5: 132 + words.append(rng.choice(_CONNECTORS)) 133 + n -= 1 134 + 135 + words += rng.sample(_WORDS, k=min(max(n, 0), len(_WORDS))) 136 + text = " ".join(words).strip() 137 + if not text: 138 + text = rng.choice(_WORDS) 139 + 140 + return _styled(text, bold=rng.random() < 0.3, italic=rng.random() < 0.3) 141 + 142 + 143 + # ── Sequence grammar ────────────────────────────────────────────────────────── 144 + 145 + def _inline_seq( 146 + rng: random.Random, 147 + n_tokens: int, 148 + require_math: bool = True, 149 + ) -> str: 150 + parts: list[str] = [] 151 + has_math = False 152 + prev_math = False 153 + 154 + for i in range(n_tokens): 155 + force_math = require_math and not has_math and i == n_tokens - 1 156 + r = rng.random() 157 + if force_math or r < 0.45: 158 + parts.append(f"${generate_expr(rng)}$") 159 + has_math = True 160 + prev_math = True 161 + elif r < 0.47: # ~2% emoji per slot 162 + parts.append(rng.choice(_EMOJI)) 163 + prev_math = False 164 + else: 165 + parts.append(_text_group(rng, first_in_doc=(i == 0), after_math=prev_math)) 166 + prev_math = False 167 + 168 + return " ".join(parts) 169 + 170 + 171 + def _list_body(rng: random.Random, depth: int = 0) -> list[str]: 172 + marker = rng.choice(["- ", "+ "]) 173 + n_items = rng.randint(2, 5) if depth == 0 else rng.randint(1, 3) 174 + indent = " " * depth 175 + lines: list[str] = [] 176 + 177 + for _ in range(n_items): 178 + has_children = depth < 1 and rng.random() < 0.25 179 + if has_children: 180 + n_tok = rng.randint(0, 2) 181 + else: 182 + n_tok = rng.choices([1, 2, 3, 4], weights=[4, 5, 3, 1])[0] 183 + 184 + require_math = rng.random() < 0.7 185 + prefix = _inline_seq(rng, n_tok, require_math=require_math) if n_tok > 0 else "" 186 + # 5% chance of emoji bullet prefix (natural in annotated lists) 187 + emoji_prefix = rng.choice(_EMOJI) + " " if rng.random() < 0.05 else "" 188 + lines.append(f"{indent}{marker}{emoji_prefix}{prefix}".rstrip()) 189 + 190 + if has_children: 191 + lines.extend(_list_body(rng, depth=depth + 1)) 192 + 193 + return lines 194 + 195 + 196 + def generate_body(rng: random.Random) -> str: 197 + if rng.random() < 0.3: 198 + return "\n".join(_list_body(rng)) 199 + n = rng.choices([2, 3, 4, 5, 6, 7], weights=[4, 8, 7, 5, 3, 1])[0] 200 + return _inline_seq(rng, n, require_math=True) 201 + 202 + 203 + # ── Rendering ───────────────────────────────────────────────────────────────── 204 + 205 + _CONTENT_TEMPLATE = ( 206 + "#set page(width: auto, height: auto, " 207 + "margin: (x: 10pt, y: 8pt), fill: white)\n" 208 + "{body}\n" 209 + ) 210 + 211 + 212 + def render_content(body: str, out_path: Path) -> tuple[bool, str]: 213 + """Returns (success, stderr).""" 214 + src = _CONTENT_TEMPLATE.format(body=body) 215 + with tempfile.NamedTemporaryFile(suffix=".typ", mode="w", delete=False) as f: 216 + f.write(src) 217 + typ_path = Path(f.name) 218 + try: 219 + result = subprocess.run( 220 + ["typst", "compile", "--format", "png", "--ppi", "150", 221 + str(typ_path), str(out_path)], 222 + capture_output=True, 223 + timeout=15, 224 + ) 225 + return result.returncode == 0, result.stderr.decode(errors="replace") 226 + except subprocess.TimeoutExpired: 227 + return False, "timeout" 228 + except FileNotFoundError: 229 + return False, "typst not found" 230 + finally: 231 + typ_path.unlink(missing_ok=True) 232 + 233 + 234 + # ── Main ────────────────────────────────────────────────────────────────────── 235 + 236 + def main() -> None: 237 + parser = argparse.ArgumentParser() 238 + parser.add_argument("--count", type=int, default=15_000) 239 + parser.add_argument("--out", default="data/typeset_mixed_train") 240 + parser.add_argument("--jobs", type=int, default=4) 241 + parser.add_argument("--seed", type=int, default=42) 242 + parser.add_argument("--show-failures", type=int, default=0, metavar="N", 243 + help="Print body + typst stderr for first N render failures") 244 + args = parser.parse_args() 245 + 246 + out = Path(args.out) 247 + img_dir = out / "images" 248 + img_dir.mkdir(parents=True, exist_ok=True) 249 + 250 + rng = random.Random(args.seed) 251 + 252 + print(f"Generating {args.count:,} unique bodies ...") 253 + seen: set[str] = set() 254 + candidates: list[str] = [] 255 + attempts = 0 256 + 257 + with tqdm(total=args.count, unit="body") as pbar: 258 + while len(candidates) < args.count: 259 + attempts += 1 260 + body = generate_body(rng) 261 + if body in seen: 262 + continue 263 + seen.add(body) 264 + candidates.append(body) 265 + pbar.update(1) 266 + 267 + print(f" {attempts:,} attempts ({attempts / len(candidates):.1f}x overhead)") 268 + 269 + print(f"Rendering {len(candidates):,} images with {args.jobs} workers ...") 270 + records: list[dict] = [] 271 + failures: int = 0 272 + 273 + shown_failures = 0 274 + 275 + def _task(body: str) -> tuple[str, str, bool, str]: 276 + h = hashlib.sha1(body.encode()).hexdigest()[:16] 277 + out_path = img_dir / f"{h}.png" 278 + ok, err = render_content(body, out_path) 279 + return body, f"images/{h}.png", ok, err 280 + 281 + with ThreadPoolExecutor(max_workers=args.jobs) as pool: 282 + futs = {pool.submit(_task, b): b for b in candidates} 283 + with tqdm(total=len(candidates), unit="img") as pbar: 284 + for fut in as_completed(futs): 285 + body, rel_path, ok, err = fut.result() 286 + if ok: 287 + records.append({"image": rel_path, "typst": body}) 288 + else: 289 + failures += 1 290 + if shown_failures < args.show_failures: 291 + tqdm.write(f"\n--- failure ---\nbody: {body!r}\n{err.strip()}") 292 + shown_failures += 1 293 + pbar.update(1) 294 + 295 + manifest = out / "manifest.jsonl" 296 + with manifest.open("w") as f: 297 + for r in records: 298 + f.write(json.dumps(r) + "\n") 299 + 300 + print(f"Wrote {len(records):,} records to {manifest} ({failures} render failures)") 301 + 302 + 303 + if __name__ == "__main__": 304 + main()
+228 -31
src/generate_typeset.py
··· 26 26 27 27 # ── Grammar ─────────────────────────────────────────────────────────────────── 28 28 29 - _VARS = ["x", "y", "z", "n", "a", "b", "t", "k", "m", "i", "j", "r", "s", "u", "v"] 29 + _VARS = ["x", "y", "z", "n", "a", "b", "t", "k", "m", "i", "j", "r", "s", 30 + "u", "v", "f", "g", "h", "p", "q"] 30 31 _GREEK = ["alpha", "beta", "theta", "lambda", "mu", "sigma", "pi", "phi", 31 - "omega", "epsilon", "delta", "gamma", "rho", "nu", "eta", "xi", "zeta"] 32 - _NUMS = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "100", "p", "q"] 32 + "omega", "epsilon", "delta", "gamma", "rho", "nu", "eta", "xi", "zeta", 33 + "Phi", "Psi", "Lambda", "Sigma", "Omega", "Delta", "Gamma", "Theta", 34 + # Alternate glyph variants (LaTeX \var* forms) 35 + "phi.alt", "epsilon.alt", "theta.alt"] 36 + _NUMS = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "100"] 33 37 _FUNCS = ["sin", "cos", "tan", "ln", "log", "exp", "arcsin", "arccos", 34 - "arctan", "sinh", "cosh", "tanh"] 35 - _RELS = ["=", "!=", "<=", ">=", "<", ">", "approx", "equiv"] 38 + "arctan", "sinh", "cosh", "tanh", "Re", "Im", "det", "tr", "ker", 39 + "arg"] 40 + _RELS = ["=", "!=", "<=", ">=", "<", ">", "approx", "equiv", "tilde.eq", 41 + "prec", "succ"] 36 42 _OPS = ["+", "-", "dot", "times"] 37 43 _TO = ["0", "oo", "1", "-oo", "infinity"] 38 44 45 + # Blackboard bold (set/space names) 46 + _BLACKBOARD = ["RR", "ZZ", "NN", "QQ", "CC", "FF"] 47 + 48 + # Vector/decorator functions applied to atoms 49 + # Accent functions usable as f(atom) in Typst math 50 + _DECORATORS = ["arrow", "hat", "tilde", "overline", "bold", "underline"] 51 + # Time-derivative accents via accent(base, sym) -- separate pool 52 + _DOT_ACCENTS = ["dot", "dot.double", "dot.triple"] 53 + 54 + # Set-theoretic binary operators 55 + _SET_OPS = ["union", "inter", "without", "times"] 56 + 57 + # Physical vector fields (used with arrow() in Maxwell branches) 58 + _FIELDS = ["E", "B", "J", "D", "H", "F", "A", "v"] 59 + 60 + # Physical constants commonly appearing as subscripted atoms 61 + _PHYS_ATOMS = ["epsilon_0", "mu_0", "sigma", "rho", "J"] 62 + 63 + # Custom math operators (op("name") gives proper operatorname spacing/limits) 64 + _CUSTOM_OPS = ["rank", "span", "coker", "im", 65 + "supp", "diam", "vol", "codim", "sgn", "sign"] 66 + 67 + # Math spacing atoms (skipping med -- visually indistinct from thin at normal zoom) 68 + _SPACES = ["thin", "thick", "quad", "wide"] 69 + 70 + # Calligraphic / script letters (Typst: cal(P), cal(H), etc.) 71 + # Used for powerset cal(P), sigma-algebras cal(F), Hilbert spaces cal(H), Lagrangians cal(L), etc. 72 + _CALLIGRAPHIC = ["A", "B", "C", "F", "G", "H", "L", "M", "N", "P", "R", "S", "T"] 73 + 39 74 40 75 def _atom(rng: random.Random) -> str: 41 76 r = rng.random() 42 - if r < 0.45: 77 + if r < 0.36: 43 78 return rng.choice(_VARS) 44 - elif r < 0.65: 79 + elif r < 0.54: 45 80 return rng.choice(_GREEK) 46 - else: 81 + elif r < 0.65: 47 82 return rng.choice(_NUMS) 83 + elif r < 0.78: 84 + return rng.choice(_BLACKBOARD) 85 + elif r < 0.90: 86 + return rng.choice(_PHYS_ATOMS) 87 + else: 88 + return f"cal({rng.choice(_CALLIGRAPHIC)})" 48 89 49 90 50 91 def _expr(rng: random.Random, depth: int = 0, max_depth: int = 3) -> str: ··· 53 94 54 95 c = rng.random() 55 96 56 - if c < 0.10: 57 - return f"frac({_expr(rng, depth+1)}, {_expr(rng, depth+1)})" 58 - elif c < 0.17: 97 + # ── Core algebra / calculus ─────────────────────────────────────────────── 98 + 99 + if c < 0.09: 100 + return f"({_expr(rng, depth+1)}) / ({_expr(rng, depth+1)})" 101 + elif c < 0.15: 59 102 return f"{_atom(rng)}^({_expr(rng, depth+1)})" 60 - elif c < 0.23: 103 + elif c < 0.20: 61 104 return f"{_atom(rng)}^{_atom(rng)}" 62 - elif c < 0.28: 105 + elif c < 0.25: 63 106 a, s, p = _atom(rng), _atom(rng), _atom(rng) 64 107 return f"{a}_{s}^{p}" 65 - elif c < 0.34: 108 + elif c < 0.30: 66 109 v, lo, hi = _atom(rng), _atom(rng), _atom(rng) 67 110 return f"sum_({v} = {lo})^{hi} {_expr(rng, depth+1)}" 68 - elif c < 0.39: 111 + elif c < 0.35: 69 112 lo, hi = _atom(rng), _atom(rng) 70 - dv = rng.choice(["x", "y", "t"]) 113 + dv = rng.choice(["x", "y", "t", "r"]) 71 114 return f"integral_{lo}^{hi} {_expr(rng, depth+1)} dif {dv}" 72 - elif c < 0.44: 115 + elif c < 0.39: 73 116 return f"{rng.choice(_FUNCS)}({_expr(rng, depth+1)})" 74 - elif c < 0.48: 117 + elif c < 0.43: 75 118 return f"sqrt({_expr(rng, depth+1)})" 76 - elif c < 0.52: 119 + elif c < 0.46: 77 120 n = rng.randint(2, 5) 78 121 return f"root({n}, {_expr(rng, depth+1)})" 79 - elif c < 0.56: 122 + elif c < 0.50: 80 123 v = rng.choice(_VARS) 81 124 to = rng.choice(_TO) 82 125 return f"lim_({v} -> {to}) {_expr(rng, depth+1)}" 83 - elif c < 0.60: 126 + elif c < 0.53: 84 127 return f"binom({_atom(rng)}, {_atom(rng)})" 85 - elif c < 0.67: 128 + elif c < 0.59: 86 129 op = rng.choice(_OPS) 87 130 return f"({_expr(rng, depth+1)} {op} {_expr(rng, depth+1)})" 88 - elif c < 0.72: 131 + elif c < 0.63: 89 132 rel = rng.choice(_RELS) 90 133 return f"{_expr(rng, depth+1)} {rel} {_expr(rng, depth+1)}" 91 - elif c < 0.76: 134 + elif c < 0.67: 92 135 v, lo, hi = _atom(rng), _atom(rng), _atom(rng) 93 136 return f"product_({v} = {lo})^{hi} {_expr(rng, depth+1)}" 94 - elif c < 0.80: 95 - e = [_atom(rng) for _ in range(4)] 96 - return f"mat({e[0]}, {e[1]}; {e[2]}, {e[3]})" 97 - elif c < 0.84: 137 + elif c < 0.70: 138 + if rng.random() < 0.35: 139 + e = [_atom(rng) for _ in range(9)] 140 + return (f"mat({e[0]}, {e[1]}, {e[2]}; " 141 + f"{e[3]}, {e[4]}, {e[5]}; " 142 + f"{e[6]}, {e[7]}, {e[8]})") 143 + else: 144 + e = [_atom(rng) for _ in range(4)] 145 + return f"mat({e[0]}, {e[1]}; {e[2]}, {e[3]})" 146 + elif c < 0.73: 98 147 f_var = rng.choice(_VARS) 99 148 x_var = rng.choice(_VARS) 100 149 return f"(dif {f_var}) / (dif {x_var})" 101 - elif c < 0.88: 150 + elif c < 0.76: 102 151 f_var = rng.choice(_VARS) 103 152 x_var = rng.choice(_VARS) 104 - return f"(diff {f_var}) / (diff {x_var})" 105 - elif c < 0.92: 153 + return f"(partial {f_var}) / (partial {x_var})" 154 + elif c < 0.79: 106 155 return f"norm({_expr(rng, depth+1)})" 156 + 157 + # ── Logic / sequent calculus ────────────────────────────────────────────── 158 + 159 + elif c < 0.82: 160 + r2 = rng.random() 161 + A = _expr(rng, depth+1) 162 + if r2 < 0.25: 163 + # Sequent: Gamma ⊢ A 164 + ctx = rng.choice(["Gamma", "Delta", "Sigma"]) 165 + return f"({ctx} tack.r {A})" 166 + elif r2 < 0.50: 167 + # Propositional connective 168 + op = rng.choice(["and", "or", "=>", "<=>", "xor"]) 169 + return f"({A} {op} {_expr(rng, depth+1)})" 170 + elif r2 < 0.65: 171 + # Negation 172 + return f"not {A}" 173 + elif r2 < 0.78: 174 + # Semantic entailment / models 175 + return f"({A} models {_expr(rng, depth+1)})" 176 + elif r2 < 0.88: 177 + # Proposition with top/bot constant 178 + tb = rng.choice(["top", "bot"]) 179 + op = rng.choice(["=>", "<=>", "and", "or"]) 180 + return f"({A} {op} {tb})" 181 + else: 182 + # Type judgment: a : tau 183 + return f"({_atom(rng)} : {A})" 184 + 185 + # ── Decorators / vector notation ────────────────────────────────────────── 186 + 187 + elif c < 0.84: 188 + dec = rng.choice(_DECORATORS) 189 + return f"{dec}({_atom(rng)})" 190 + elif c < 0.86: 191 + # dot / dot.double / dot.triple accents (time derivatives) 192 + return f"accent({rng.choice(_VARS)}, {rng.choice(_DOT_ACCENTS)})" 193 + 194 + # ── Sets and types ──────────────────────────────────────────────────────── 195 + 196 + elif c < 0.87: 197 + # membership: x in RR, k in ZZ, ... 198 + return f"{_atom(rng)} in {rng.choice(_BLACKBOARD)}" 199 + elif c < 0.89: 200 + op = rng.choice(_SET_OPS) 201 + return f"({_expr(rng, depth+1)} {op} {_expr(rng, depth+1)})" 202 + 203 + # ── Function signatures / arrows ────────────────────────────────────────── 204 + 205 + elif c < 0.91: 206 + # f: RR^n -> RR or f: A -> B or f: A times B -> C 207 + fname = rng.choice(_VARS) 208 + dom = rng.choice(_BLACKBOARD) 209 + cod = rng.choice(_BLACKBOARD) 210 + r2 = rng.random() 211 + if r2 < 0.35: 212 + exp = rng.choice(["n", "m", "k", "2", "3"]) 213 + return f"({fname}: {dom}^{exp} -> {cod})" 214 + elif r2 < 0.60: 215 + return f"({fname}: {dom} -> {cod})" 216 + elif r2 < 0.80: 217 + d2 = rng.choice(_BLACKBOARD) 218 + return f"({fname}: {dom} times {d2} -> {cod})" 219 + else: 220 + d2 = rng.choice(_BLACKBOARD) 221 + d3 = rng.choice(_BLACKBOARD) 222 + return f"({fname}: {dom} times {d2} times {d3} -> {cod})" 223 + 224 + # ── Logic / type theory ─────────────────────────────────────────────────── 225 + 226 + elif c < 0.93: 227 + q = rng.choice(["forall", "exists"]) 228 + v = rng.choice(_VARS) 229 + return f"({q} {v}, {_expr(rng, depth+1)})" 230 + elif c < 0.94: 231 + v = rng.choice(_VARS) 232 + return f"lambda {v}. {_expr(rng, depth+1)}" 233 + 234 + # ── Vector calculus / Maxwell ───────────────────────────────────────────── 235 + 236 + elif c < 0.96: 237 + # grad, div, curl, Laplacian -- nabla and text forms 238 + r2 = rng.random() 239 + vec = f"arrow({rng.choice(_FIELDS)})" 240 + if r2 < 0.20: 241 + return f"nabla dot {vec}" # divergence (nabla form) 242 + elif r2 < 0.40: 243 + return f"nabla times {vec}" # curl (nabla form) 244 + elif r2 < 0.55: 245 + return f"nabla^2 {_atom(rng)}" # Laplacian 246 + elif r2 < 0.68: 247 + return f"nabla {_expr(rng, depth+1)}" # gradient 248 + elif r2 < 0.78: 249 + return f'op("div") {vec}' # divergence (text form) 250 + elif r2 < 0.88: 251 + return f'op("curl") {vec}' # curl (text form) 252 + else: 253 + # mixed: e.g. (nabla dot E) = rho / epsilon_0 254 + lhs = f"nabla dot {vec}" 255 + rhs = _expr(rng, depth+1) 256 + return f"{lhs} = {rhs}" 257 + 258 + elif c < 0.97: 259 + # Maxwell integral form: closed line / surface / volume integrals 260 + r2 = rng.random() 261 + vec = rng.choice(_FIELDS) 262 + if r2 < 0.50: 263 + curve = rng.choice(["C", "partial S", "partial V"]) 264 + dl = rng.choice(["l", "s"]) 265 + return f"integral.cont_{curve} arrow({vec}) dot dif arrow({dl})" 266 + elif r2 < 0.85: 267 + surf = rng.choice(["S", "partial V", "partial Omega"]) 268 + return f"integral.surf_{surf} arrow({vec}) dot dif arrow(A)" 269 + else: 270 + vol = rng.choice(["V", "Omega"]) 271 + return f"integral.vol_{vol} {_expr(rng, depth+1)} dif V" 272 + 273 + # ── Miscellaneous useful constructs ─────────────────────────────────────── 274 + 275 + elif c < 0.975: 276 + fn = rng.choice(["abs", "floor", "ceil"]) 277 + return f"{fn}({_expr(rng, depth+1)})" 278 + elif c < 0.985: 279 + # piecewise / cases 280 + e1 = _expr(rng, depth+1) 281 + e2 = _expr(rng, depth+1) 282 + cond = f"{_atom(rng)} > {_atom(rng)}" 283 + return f'cases({e1} "if" {cond}, {e2} "otherwise")' 284 + elif c < 0.992: 285 + # indexed sequence with ellipsis: (a_1, dots.c, a_n) 286 + # parens required -- bare commas break multi-arg contexts (sqrt, norm, etc.) 287 + v = rng.choice(_VARS) 288 + n = rng.choice(["n", "m", "N", "k"]) 289 + return f"({v}_1, dots.c, {v}_{n})" 290 + elif c < 0.994: 291 + # chevron bracket pair (inner product, type constructors) 292 + # outer parens prevent comma from being parsed as extra function arg 293 + return f"(chevron.l {_expr(rng, depth+1)}, {_expr(rng, depth+1)} chevron.r)" 294 + elif c < 0.997: 295 + # custom math operators: op("rank")(x) gives proper operatorname spacing 296 + return f'op("{rng.choice(_CUSTOM_OPS)}")({_expr(rng, depth+1)})' 297 + elif c < 0.9985: 298 + # calligraphic applied to expr: cal(P)(X) powerset, cal(H) Hilbert space, etc. 299 + return f"cal({rng.choice(['P', 'F', 'H', 'L', 'B'])})({_expr(rng, depth+1)})" 300 + elif c < 0.999: 301 + # explicit math spacing between two sub-expressions 302 + sp = rng.choice(_SPACES) 303 + return f"{_atom(rng)} {sp} {_atom(rng)}" 107 304 else: 108 305 return _atom(rng) 109 306
+6 -1
src/static/review.html
··· 330 330 // Image panel 331 331 const imgDiv = document.createElement('div'); 332 332 imgDiv.className = 'card-img'; 333 + const imgLink = document.createElement('a'); 334 + imgLink.href = `/image/${item.id}`; 335 + imgLink.target = '_blank'; 336 + imgLink.title = 'Open full resolution'; 333 337 const img = document.createElement('img'); 334 338 img.src = `/image/${item.id}`; 335 339 img.alt = ''; 336 340 img.loading = 'lazy'; 337 - imgDiv.appendChild(img); 341 + imgLink.appendChild(img); 342 + imgDiv.appendChild(imgLink); 338 343 card.appendChild(imgDiv); 339 344 340 345 // Body