this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add replace_frac

+121
+1
pyproject.toml
··· 34 34 review = "src.review_app:main" 35 35 apply-edits = "src.apply_edits:main" 36 36 search-labels = "src.search_labels:main" 37 + replace-frac = "src.replace_frac:main" 37 38 eff-mer-evaluate = "src.eff_mer.infer:main" 38 39 39 40 [build-system]
+120
src/replace_frac.py
··· 1 + """ 2 + Replace frac(A, B) -> (A) / (B) in all manifest labels. 3 + 4 + Uses a stack-based parser to handle nested parentheses correctly. 5 + 6 + Usage: 7 + uv run replace-frac [--split NAME] [--dry-run] 8 + """ 9 + 10 + import argparse 11 + import json 12 + import shutil 13 + 14 + from .data import DATA_ROOT, TRAIN_SPLITS, VAL_SPLITS, TEST_SPLITS 15 + 16 + 17 + def _all_splits(): 18 + return TRAIN_SPLITS + VAL_SPLITS + TEST_SPLITS 19 + 20 + 21 + def replace_frac_once(s: str) -> tuple[str, bool]: 22 + """Replace the first occurrence of frac(..., ...) in s. Returns (result, changed).""" 23 + idx = s.find("frac(") 24 + if idx == -1: 25 + return s, False 26 + 27 + # Walk from the opening paren to find the top-level comma and closing paren 28 + start = idx + len("frac(") 29 + depth = 1 30 + top_comma = None 31 + i = start 32 + while i < len(s) and depth > 0: 33 + c = s[i] 34 + if c == "(": 35 + depth += 1 36 + elif c == ")": 37 + depth -= 1 38 + if depth == 0: 39 + break 40 + elif c == "," and depth == 1 and top_comma is None: 41 + top_comma = i 42 + i += 1 43 + 44 + if depth != 0 or top_comma is None: 45 + # Malformed -- skip 46 + return s, False 47 + 48 + close = i # index of the matching ')' 49 + a = s[start:top_comma].strip() 50 + b = s[top_comma + 1:close].strip() 51 + replacement = f"({a}) / ({b})" 52 + return s[:idx] + replacement + s[close + 1:], True 53 + 54 + 55 + def replace_frac_all(s: str) -> tuple[str, int]: 56 + """Replace all frac(...) occurrences iteratively.""" 57 + count = 0 58 + while True: 59 + s, changed = replace_frac_once(s) 60 + if not changed: 61 + break 62 + count += 1 63 + return s, count 64 + 65 + 66 + def main() -> None: 67 + parser = argparse.ArgumentParser( 68 + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter 69 + ) 70 + parser.add_argument("--split", default=None, help="Restrict to one split") 71 + parser.add_argument("--dry-run", action="store_true", 72 + help="Print changes without writing") 73 + args = parser.parse_args() 74 + 75 + splits = [args.split] if args.split else _all_splits() 76 + grand_total = 0 77 + 78 + for split_name in splits: 79 + manifest = DATA_ROOT / split_name / "manifest.jsonl" 80 + if not manifest.exists(): 81 + continue 82 + 83 + lines = manifest.read_text().splitlines() 84 + new_lines = [] 85 + changed = 0 86 + 87 + for line in lines: 88 + if not line.strip(): 89 + new_lines.append(line) 90 + continue 91 + rec = json.loads(line) 92 + orig = rec.get("typst", "") 93 + new_t, n = replace_frac_all(orig) 94 + if n: 95 + changed += 1 96 + if args.dry_run: 97 + print(f" [{split_name}] {orig!r}") 98 + print(f" -> {new_t!r}") 99 + rec = dict(rec) 100 + rec["typst"] = new_t 101 + new_lines.append(json.dumps(rec)) 102 + 103 + if changed: 104 + print(f"[{split_name}] {changed} records updated") 105 + if not args.dry_run: 106 + bak = manifest.with_suffix(".jsonl.bak") 107 + shutil.copy2(manifest, bak) 108 + manifest.write_text("\n".join(new_lines) + "\n") 109 + 110 + grand_total += changed 111 + 112 + print(f"\nTotal: {grand_total} records updated") 113 + if args.dry_run: 114 + print("(dry run -- no files written)") 115 + else: 116 + print("Run dvc add + git commit to record changes.") 117 + 118 + 119 + if __name__ == "__main__": 120 + main()