""" Replace `d` → `dif` in two Typst differential contexts: 1. Derivative fractions: (d A) / (d B) → (dif A) / (dif B) (d^2 y) / (d x^2) → (dif^2 y) / (dif x^2) (d^{n} y) / (d x^n) → (dif^{n} y) / (dif x^n) 2. Integral differentials (d-token that follows an `integral` keyword): integral f(x) d x → integral f(x) dif x integral v f d^3 v → integral v f dif^3 v integral f(x) d_q x → integral f(x) dif_q x Usage: uv run replace-dif [--split NAME] [--dry-run] [--limit N] """ import argparse import json import re import shutil from pathlib import Path from .data import DATA_ROOT, TRAIN_SPLITS, VAL_SPLITS, TEST_SPLITS def _all_splits(): return TRAIN_SPLITS + VAL_SPLITS + TEST_SPLITS # Optional superscript/subscript modifier: ^2, _q, ^{n+1}, _{k=0}, … _MOD = r'([_^](?:\{[^}]*\}|[^\s,()=+*/\\-]+))?' # Pattern 1a: (d ...) / (d ...) — both sides parenthesised # Groups: mod1, rest1, mod2, rest2 _DERIV_RE = re.compile( r'\(d' + _MOD + r'(\s+[^)]+)\)' r'\s*/\s*' r'\(d' + _MOD + r'(\s+[^)]+)\)' ) # Pattern 1b: bare d / (d ...) — operator notation, e.g. d/(d z) or d / (d^2 z) # Groups: slash_span, mod_denom, rest_denom _D_OVER_PAREN_D_RE = re.compile( r'\bd(\s*/\s*)\(d' + _MOD + r'(\s+[^)]+)\)' ) # d-token as integral differential: standalone d (with optional modifier) # followed by whitespace then a variable / open-paren / backslash command. _D_TOKEN_RE = re.compile(r'\bd' + _MOD + r'(?=\s+[a-zA-Z(\\])') # integral keyword _INTEGRAL_RE = re.compile(r'\bintegral\b') def _replace_deriv(s: str) -> tuple[str, int]: count = [0] def repl_full(m: re.Match) -> str: count[0] += 1 mod1 = m.group(1) or '' rest1 = m.group(2) mod2 = m.group(3) or '' rest2 = m.group(4) return f'(dif{mod1}{rest1}) / (dif{mod2}{rest2})' def repl_op(m: re.Match) -> str: count[0] += 1 slash = m.group(1) # e.g. "/" or " / " mod_denom = m.group(2) or '' rest = m.group(3) return f'dif{slash}(dif{mod_denom}{rest})' s = _DERIV_RE.sub(repl_full, s) s = _D_OVER_PAREN_D_RE.sub(repl_op, s) return s, count[0] def _replace_integral_d(s: str) -> tuple[str, int]: """Replace d-tokens that appear after an integral keyword.""" if not _INTEGRAL_RE.search(s): return s, 0 int_positions = [m.start() for m in _INTEGRAL_RE.finditer(s)] # Keep only d-tokens that have at least one integral somewhere before them to_replace = [ dm for dm in _D_TOKEN_RE.finditer(s) if any(ip < dm.start() for ip in int_positions) ] if not to_replace: return s, 0 # Substitute right-to-left so earlier offsets stay valid result = list(s) for dm in reversed(to_replace): # dm matches the full d-token (e.g. "d^3"); replace only the leading "d" result[dm.start() : dm.start() + 1] = list('dif') return ''.join(result), len(to_replace) def replace_dif_all(s: str) -> tuple[str, int]: """Apply both replacement passes. Returns (new_string, n_replacements).""" s, n1 = _replace_deriv(s) s, n2 = _replace_integral_d(s) return s, n1 + n2 def main() -> None: parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('--split', default=None, help='Restrict to one split') parser.add_argument('--dry-run', action='store_true', help='Print changes without writing') parser.add_argument('--limit', type=int, default=None, metavar='N', help='In dry-run: stop after N changed records') args = parser.parse_args() splits = [args.split] if args.split else _all_splits() grand_total = 0 shown = 0 for split_name in splits: manifest = DATA_ROOT / split_name / 'manifest.jsonl' if not manifest.exists(): continue lines = manifest.read_text().splitlines() new_lines = [] changed = 0 for line in lines: if not line.strip(): new_lines.append(line) continue rec = json.loads(line) orig = rec.get('typst', '') new_t, n = replace_dif_all(orig) if n: changed += 1 if args.dry_run and (args.limit is None or shown < args.limit): print(f' [{split_name}]') print(f' before: {orig!r}') print(f' after: {new_t!r}') shown += 1 rec = dict(rec) rec['typst'] = new_t new_lines.append(json.dumps(rec)) if changed: print(f'[{split_name}] {changed} records would change' if args.dry_run else f'[{split_name}] {changed} records updated') if not args.dry_run: bak = manifest.with_suffix('.jsonl.bak') shutil.copy2(manifest, bak) manifest.write_text('\n'.join(new_lines) + '\n') grand_total += changed if args.dry_run and args.limit is not None and shown >= args.limit: print(f'(stopped after {args.limit} shown)') break print(f'\nTotal: {grand_total} records') if args.dry_run: print('(dry run -- no files written)') else: print('Run dvc add + git commit to record changes.') if __name__ == '__main__': main()