this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at 7ed4e562cd2ae4b2bf80fb17992106f346ea9d38 175 lines 5.6 kB view raw
1""" 2Replace `d` → `dif` in two Typst differential contexts: 3 4 1. Derivative fractions: 5 (d A) / (d B) → (dif A) / (dif B) 6 (d^2 y) / (d x^2) → (dif^2 y) / (dif x^2) 7 (d^{n} y) / (d x^n) → (dif^{n} y) / (dif x^n) 8 9 2. Integral differentials (d-token that follows an `integral` keyword): 10 integral f(x) d x → integral f(x) dif x 11 integral v f d^3 v → integral v f dif^3 v 12 integral f(x) d_q x → integral f(x) dif_q x 13 14Usage: 15 uv run replace-dif [--split NAME] [--dry-run] [--limit N] 16""" 17 18import argparse 19import json 20import re 21import shutil 22from pathlib import Path 23 24from .data import DATA_ROOT, TRAIN_SPLITS, VAL_SPLITS, TEST_SPLITS 25 26 27def _all_splits(): 28 return TRAIN_SPLITS + VAL_SPLITS + TEST_SPLITS 29 30 31# Optional superscript/subscript modifier: ^2, _q, ^{n+1}, _{k=0}, … 32_MOD = r'([_^](?:\{[^}]*\}|[^\s,()=+*/\\-]+))?' 33 34# Pattern 1a: (d ...) / (d ...) — both sides parenthesised 35# Groups: mod1, rest1, mod2, rest2 36_DERIV_RE = re.compile( 37 r'\(d' + _MOD + r'(\s+[^)]+)\)' 38 r'\s*/\s*' 39 r'\(d' + _MOD + r'(\s+[^)]+)\)' 40) 41 42# Pattern 1b: bare d / (d ...) — operator notation, e.g. d/(d z) or d / (d^2 z) 43# Groups: slash_span, mod_denom, rest_denom 44_D_OVER_PAREN_D_RE = re.compile( 45 r'\bd(\s*/\s*)\(d' + _MOD + r'(\s+[^)]+)\)' 46) 47 48# d-token as integral differential: standalone d (with optional modifier) 49# followed by whitespace then a variable / open-paren / backslash command. 50_D_TOKEN_RE = re.compile(r'\bd' + _MOD + r'(?=\s+[a-zA-Z(\\])') 51 52# integral keyword 53_INTEGRAL_RE = re.compile(r'\bintegral\b') 54 55 56def _replace_deriv(s: str) -> tuple[str, int]: 57 count = [0] 58 59 def repl_full(m: re.Match) -> str: 60 count[0] += 1 61 mod1 = m.group(1) or '' 62 rest1 = m.group(2) 63 mod2 = m.group(3) or '' 64 rest2 = m.group(4) 65 return f'(dif{mod1}{rest1}) / (dif{mod2}{rest2})' 66 67 def repl_op(m: re.Match) -> str: 68 count[0] += 1 69 slash = m.group(1) # e.g. "/" or " / " 70 mod_denom = m.group(2) or '' 71 rest = m.group(3) 72 return f'dif{slash}(dif{mod_denom}{rest})' 73 74 s = _DERIV_RE.sub(repl_full, s) 75 s = _D_OVER_PAREN_D_RE.sub(repl_op, s) 76 return s, count[0] 77 78 79def _replace_integral_d(s: str) -> tuple[str, int]: 80 """Replace d-tokens that appear after an integral keyword.""" 81 if not _INTEGRAL_RE.search(s): 82 return s, 0 83 84 int_positions = [m.start() for m in _INTEGRAL_RE.finditer(s)] 85 86 # Keep only d-tokens that have at least one integral somewhere before them 87 to_replace = [ 88 dm for dm in _D_TOKEN_RE.finditer(s) 89 if any(ip < dm.start() for ip in int_positions) 90 ] 91 if not to_replace: 92 return s, 0 93 94 # Substitute right-to-left so earlier offsets stay valid 95 result = list(s) 96 for dm in reversed(to_replace): 97 # dm matches the full d-token (e.g. "d^3"); replace only the leading "d" 98 result[dm.start() : dm.start() + 1] = list('dif') 99 100 return ''.join(result), len(to_replace) 101 102 103def replace_dif_all(s: str) -> tuple[str, int]: 104 """Apply both replacement passes. Returns (new_string, n_replacements).""" 105 s, n1 = _replace_deriv(s) 106 s, n2 = _replace_integral_d(s) 107 return s, n1 + n2 108 109 110def main() -> None: 111 parser = argparse.ArgumentParser( 112 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter 113 ) 114 parser.add_argument('--split', default=None, help='Restrict to one split') 115 parser.add_argument('--dry-run', action='store_true', 116 help='Print changes without writing') 117 parser.add_argument('--limit', type=int, default=None, metavar='N', 118 help='In dry-run: stop after N changed records') 119 args = parser.parse_args() 120 121 splits = [args.split] if args.split else _all_splits() 122 grand_total = 0 123 shown = 0 124 125 for split_name in splits: 126 manifest = DATA_ROOT / split_name / 'manifest.jsonl' 127 if not manifest.exists(): 128 continue 129 130 lines = manifest.read_text().splitlines() 131 new_lines = [] 132 changed = 0 133 134 for line in lines: 135 if not line.strip(): 136 new_lines.append(line) 137 continue 138 rec = json.loads(line) 139 orig = rec.get('typst', '') 140 new_t, n = replace_dif_all(orig) 141 if n: 142 changed += 1 143 if args.dry_run and (args.limit is None or shown < args.limit): 144 print(f' [{split_name}]') 145 print(f' before: {orig!r}') 146 print(f' after: {new_t!r}') 147 shown += 1 148 rec = dict(rec) 149 rec['typst'] = new_t 150 new_lines.append(json.dumps(rec)) 151 152 if changed: 153 print(f'[{split_name}] {changed} records would change' 154 if args.dry_run else 155 f'[{split_name}] {changed} records updated') 156 if not args.dry_run: 157 bak = manifest.with_suffix('.jsonl.bak') 158 shutil.copy2(manifest, bak) 159 manifest.write_text('\n'.join(new_lines) + '\n') 160 161 grand_total += changed 162 163 if args.dry_run and args.limit is not None and shown >= args.limit: 164 print(f'(stopped after {args.limit} shown)') 165 break 166 167 print(f'\nTotal: {grand_total} records') 168 if args.dry_run: 169 print('(dry run -- no files written)') 170 else: 171 print('Run dvc add + git commit to record changes.') 172 173 174if __name__ == '__main__': 175 main()