this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add crohme_gen_syntactic split; fix val stratification; reduce batch size; fix checkpoint resume; drop transformers pin

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

+113 -61
-1
pyproject.toml
··· 19 19 "python-multipart>=0.0.9", 20 20 "einops>=0.8.2", 21 21 "easydict>=1.13", 22 - "transformers==4.47.1", 23 22 "tensorboard>=2.20.0", 24 23 "setuptools==81.0.0", 25 24 ]
+19 -11
src/train.py
··· 6 6 7 7 import argparse 8 8 import random 9 + from pathlib import Path 9 10 10 11 from unsloth import FastVisionModel 11 12 from unsloth.trainer import UnslothVisionDataCollator 12 13 from trl import SFTTrainer, SFTConfig 13 14 14 - from .data import (BASE_MODEL, TRAIN_SPLITS, VAL_SPLITS, load_records, make_dataset) 15 + from .data import (BASE_MODEL, TRAIN_SPLITS, load_records, make_dataset) 15 16 16 17 # Per-split record caps. Synthetic-heavy splits are capped to prevent them 17 18 # from dominating the training mix. Real and document-structure splits are 18 19 # uncapped. Override any cap with --cap SPLIT N. 19 20 _DEFAULT_CAPS = { 20 - "mathwriting_synthetic": 20_000, 21 - "crohme_gen_2019": 15_000, 22 - "mathwriting_train": 10_000, 21 + "mathwriting_synthetic": 20_000, 22 + "crohme_gen_2019": 15_000, 23 + "crohme_gen_syntactic": 15_000, 24 + "mathwriting_train": 10_000, 23 25 } 24 26 25 27 ··· 67 69 train_records.extend(recs) 68 70 rng.shuffle(train_records) 69 71 72 + # Stratified val: 250 mathwriting + 250 typeset + all mixed (500) = 1000 70 73 val_rng = random.Random(42) 71 - val_records = load_records(VAL_SPLITS, dedupe=False) 72 - if len(val_records) > 1000: 73 - val_records = val_rng.sample(val_records, 1000) 74 + mw_val = load_records(["mathwriting_val"], dedupe=False) 75 + ts_val = load_records(["typeset_val"], dedupe=False) 76 + mixed_val = load_records(["typeset_mixed_val"], dedupe=False) 77 + val_records = (val_rng.sample(mw_val, min(250, len(mw_val))) 78 + + val_rng.sample(ts_val, min(250, len(ts_val))) 79 + + mixed_val) 80 + val_rng.shuffle(val_records) 74 81 75 82 print(f"Train: {len(train_records):,} Val: {len(val_records):,}") 76 83 for split in TRAIN_SPLITS: ··· 89 96 train_dataset=train_ds, 90 97 eval_dataset=val_ds, 91 98 args=SFTConfig( 92 - per_device_train_batch_size=4, 93 - per_device_eval_batch_size=4, 94 - gradient_accumulation_steps=4, # effective batch 16 99 + per_device_train_batch_size=2, 100 + per_device_eval_batch_size=2, 101 + gradient_accumulation_steps=8, # effective batch 16 95 102 num_train_epochs=args.epochs, 96 103 learning_rate=args.lr, 97 104 warmup_steps=500, ··· 112 119 ), 113 120 ) 114 121 115 - trainer.train(resume_from_checkpoint=True) 122 + has_checkpoint = any(Path(out_dir).glob("checkpoint-*")) if Path(out_dir).exists() else False 123 + trainer.train(resume_from_checkpoint=has_checkpoint) 116 124 model.save_pretrained(f"{out_dir}/final") 117 125 processor.save_pretrained(f"{out_dir}/final") 118 126 print(f"Saved to {out_dir}/final")
+94 -49
uv.lock
··· 865 865 866 866 [[package]] 867 867 name = "huggingface-hub" 868 - version = "0.36.2" 868 + version = "1.10.1" 869 869 source = { registry = "https://pypi.org/simple" } 870 870 dependencies = [ 871 871 { name = "filelock" }, 872 872 { name = "fsspec" }, 873 - { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, 873 + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, 874 + { name = "httpx" }, 874 875 { name = "packaging" }, 875 876 { name = "pyyaml" }, 876 - { name = "requests" }, 877 877 { name = "tqdm" }, 878 + { name = "typer" }, 878 879 { name = "typing-extensions" }, 879 880 ] 880 - sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" } 881 + sdist = { url = "https://files.pythonhosted.org/packages/e4/28/baf5d745559503ce8d28cf5bc9551f5ac59158eafd7b6a6afff0bcdb0f50/huggingface_hub-1.10.1.tar.gz", hash = "sha256:696c53cf9c2ac9befbfb5dd41d05392a031c69fc6930d1ed9671debd405b6fff", size = 758094, upload-time = "2026-04-09T15:01:18.928Z" } 881 882 wheels = [ 882 - { url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" }, 883 - ] 884 - 885 - [package.optional-dependencies] 886 - hf-xet = [ 887 - { name = "hf-xet" }, 883 + { url = "https://files.pythonhosted.org/packages/83/8c/c7a33f3efaa8d6a5bc40e012e5ecc2d72c2e6124550ca9085fe0ceed9993/huggingface_hub-1.10.1-py3-none-any.whl", hash = "sha256:6b981107a62fbe68c74374418983399c632e35786dcd14642a9f2972633c8b5a", size = 642630, upload-time = "2026-04-09T15:01:17.35Z" }, 888 884 ] 889 885 890 886 [[package]] ··· 1315 1311 { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741, upload-time = "2024-01-28T18:52:29.395Z" }, 1316 1312 { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628, upload-time = "2024-01-28T18:52:30.853Z" }, 1317 1313 { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" }, 1314 + ] 1315 + 1316 + [[package]] 1317 + name = "nest-asyncio" 1318 + version = "1.6.0" 1319 + source = { registry = "https://pypi.org/simple" } 1320 + sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418, upload-time = "2024-01-21T14:25:19.227Z" } 1321 + wheels = [ 1322 + { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" }, 1318 1323 ] 1319 1324 1320 1325 [[package]] ··· 2212 2217 ] 2213 2218 2214 2219 [[package]] 2220 + name = "shellingham" 2221 + version = "1.5.4" 2222 + source = { registry = "https://pypi.org/simple" } 2223 + sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } 2224 + wheels = [ 2225 + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, 2226 + ] 2227 + 2228 + [[package]] 2215 2229 name = "six" 2216 2230 version = "1.17.0" 2217 2231 source = { registry = "https://pypi.org/simple" } ··· 2277 2291 2278 2292 [[package]] 2279 2293 name = "tokenizers" 2280 - version = "0.21.4" 2294 + version = "0.22.2" 2281 2295 source = { registry = "https://pypi.org/simple" } 2282 2296 dependencies = [ 2283 2297 { name = "huggingface-hub" }, 2284 2298 ] 2285 - sdist = { url = "https://files.pythonhosted.org/packages/c2/2f/402986d0823f8d7ca139d969af2917fefaa9b947d1fb32f6168c509f2492/tokenizers-0.21.4.tar.gz", hash = "sha256:fa23f85fbc9a02ec5c6978da172cdcbac23498c3ca9f3645c5c68740ac007880", size = 351253, upload-time = "2025-07-28T15:48:54.325Z" } 2299 + sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" } 2286 2300 wheels = [ 2287 - { url = "https://files.pythonhosted.org/packages/98/c6/fdb6f72bf6454f52eb4a2510be7fb0f614e541a2554d6210e370d85efff4/tokenizers-0.21.4-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2ccc10a7c3bcefe0f242867dc914fc1226ee44321eb618cfe3019b5df3400133", size = 2863987, upload-time = "2025-07-28T15:48:44.877Z" }, 2288 - { url = "https://files.pythonhosted.org/packages/8d/a6/28975479e35ddc751dc1ddc97b9b69bf7fcf074db31548aab37f8116674c/tokenizers-0.21.4-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:5e2f601a8e0cd5be5cc7506b20a79112370b9b3e9cb5f13f68ab11acd6ca7d60", size = 2732457, upload-time = "2025-07-28T15:48:43.265Z" }, 2289 - { url = "https://files.pythonhosted.org/packages/aa/8f/24f39d7b5c726b7b0be95dca04f344df278a3fe3a4deb15a975d194cbb32/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b376f5a1aee67b4d29032ee85511bbd1b99007ec735f7f35c8a2eb104eade5", size = 3012624, upload-time = "2025-07-28T13:22:43.895Z" }, 2290 - { url = "https://files.pythonhosted.org/packages/58/47/26358925717687a58cb74d7a508de96649544fad5778f0cd9827398dc499/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2107ad649e2cda4488d41dfd031469e9da3fcbfd6183e74e4958fa729ffbf9c6", size = 2939681, upload-time = "2025-07-28T13:22:47.499Z" }, 2291 - { url = "https://files.pythonhosted.org/packages/99/6f/cc300fea5db2ab5ddc2c8aea5757a27b89c84469899710c3aeddc1d39801/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c73012da95afafdf235ba80047699df4384fdc481527448a078ffd00e45a7d9", size = 3247445, upload-time = "2025-07-28T15:48:39.711Z" }, 2292 - { url = "https://files.pythonhosted.org/packages/be/bf/98cb4b9c3c4afd8be89cfa6423704337dc20b73eb4180397a6e0d456c334/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f23186c40395fc390d27f519679a58023f368a0aad234af145e0f39ad1212732", size = 3428014, upload-time = "2025-07-28T13:22:49.569Z" }, 2293 - { url = "https://files.pythonhosted.org/packages/75/c7/96c1cc780e6ca7f01a57c13235dd05b7bc1c0f3588512ebe9d1331b5f5ae/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc88bb34e23a54cc42713d6d98af5f1bf79c07653d24fe984d2d695ba2c922a2", size = 3193197, upload-time = "2025-07-28T13:22:51.471Z" }, 2294 - { url = "https://files.pythonhosted.org/packages/f2/90/273b6c7ec78af547694eddeea9e05de771278bd20476525ab930cecaf7d8/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51b7eabb104f46c1c50b486520555715457ae833d5aee9ff6ae853d1130506ff", size = 3115426, upload-time = "2025-07-28T15:48:41.439Z" }, 2295 - { url = "https://files.pythonhosted.org/packages/91/43/c640d5a07e95f1cf9d2c92501f20a25f179ac53a4f71e1489a3dcfcc67ee/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:714b05b2e1af1288bd1bc56ce496c4cebb64a20d158ee802887757791191e6e2", size = 9089127, upload-time = "2025-07-28T15:48:46.472Z" }, 2296 - { url = "https://files.pythonhosted.org/packages/44/a1/dd23edd6271d4dca788e5200a807b49ec3e6987815cd9d0a07ad9c96c7c2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:1340ff877ceedfa937544b7d79f5b7becf33a4cfb58f89b3b49927004ef66f78", size = 9055243, upload-time = "2025-07-28T15:48:48.539Z" }, 2297 - { url = "https://files.pythonhosted.org/packages/21/2b/b410d6e9021c4b7ddb57248304dc817c4d4970b73b6ee343674914701197/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3c1f4317576e465ac9ef0d165b247825a2a4078bcd01cba6b54b867bdf9fdd8b", size = 9298237, upload-time = "2025-07-28T15:48:50.443Z" }, 2298 - { url = "https://files.pythonhosted.org/packages/b7/0a/42348c995c67e2e6e5c89ffb9cfd68507cbaeb84ff39c49ee6e0a6dd0fd2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:c212aa4e45ec0bb5274b16b6f31dd3f1c41944025c2358faaa5782c754e84c24", size = 9461980, upload-time = "2025-07-28T15:48:52.325Z" }, 2299 - { url = "https://files.pythonhosted.org/packages/3d/d3/dacccd834404cd71b5c334882f3ba40331ad2120e69ded32cf5fda9a7436/tokenizers-0.21.4-cp39-abi3-win32.whl", hash = "sha256:6c42a930bc5f4c47f4ea775c91de47d27910881902b0f20e4990ebe045a415d0", size = 2329871, upload-time = "2025-07-28T15:48:56.841Z" }, 2300 - { url = "https://files.pythonhosted.org/packages/41/f2/fd673d979185f5dcbac4be7d09461cbb99751554ffb6718d0013af8604cb/tokenizers-0.21.4-cp39-abi3-win_amd64.whl", hash = "sha256:475d807a5c3eb72c59ad9b5fcdb254f6e17f53dfcbb9903233b0dfa9c943b597", size = 2507568, upload-time = "2025-07-28T15:48:55.456Z" }, 2301 + { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" }, 2302 + { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" }, 2303 + { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" }, 2304 + { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" }, 2305 + { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" }, 2306 + { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" }, 2307 + { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" }, 2308 + { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" }, 2309 + { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" }, 2310 + { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" }, 2311 + { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" }, 2312 + { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" }, 2313 + { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" }, 2314 + { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" }, 2315 + { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, 2301 2316 ] 2302 2317 2303 2318 [[package]] ··· 2361 2376 ] 2362 2377 2363 2378 [[package]] 2379 + name = "torchao" 2380 + version = "0.17.0" 2381 + source = { registry = "https://pypi.org/simple" } 2382 + wheels = [ 2383 + { url = "https://files.pythonhosted.org/packages/32/fe/a4036a8e80fa800c92dbcbf75f541cd4c106248b6b579db6dab1800f616a/torchao-0.17.0-cp310-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:87a418ce0ec064a821ceab83c921b501acef0ce9a6ccd1be358fcd16c3ae8c58", size = 3206172, upload-time = "2026-03-30T22:25:52.974Z" }, 2384 + { url = "https://files.pythonhosted.org/packages/c9/37/ef37ca885265e5f79a168616767dd416a3cea1cc3b28bb6b503ce4a5b652/torchao-0.17.0-py3-none-any.whl", hash = "sha256:02eba449036715b9ae784fbaa1a6f97994bb7b0421ce92d1d5d1c08e5bd6d349", size = 1200680, upload-time = "2026-03-30T22:25:54.457Z" }, 2385 + ] 2386 + 2387 + [[package]] 2364 2388 name = "torchvision" 2365 2389 version = "0.25.0" 2366 2390 source = { registry = "https://pypi.org/simple" } ··· 2406 2430 2407 2431 [[package]] 2408 2432 name = "transformers" 2409 - version = "4.47.1" 2433 + version = "5.5.0" 2410 2434 source = { registry = "https://pypi.org/simple" } 2411 2435 dependencies = [ 2412 - { name = "filelock" }, 2413 2436 { name = "huggingface-hub" }, 2414 2437 { name = "numpy" }, 2415 2438 { name = "packaging" }, 2416 2439 { name = "pyyaml" }, 2417 2440 { name = "regex" }, 2418 - { name = "requests" }, 2419 2441 { name = "safetensors" }, 2420 2442 { name = "tokenizers" }, 2421 2443 { name = "tqdm" }, 2444 + { name = "typer" }, 2422 2445 ] 2423 - sdist = { url = "https://files.pythonhosted.org/packages/15/1a/936aeb4f88112f670b604f5748034568dbc2b9bbb457a8d4518b1a15510a/transformers-4.47.1.tar.gz", hash = "sha256:6c29c05a5f595e278481166539202bf8641281536df1c42357ee58a45d0a564a", size = 8707421, upload-time = "2024-12-17T15:42:41.653Z" } 2446 + sdist = { url = "https://files.pythonhosted.org/packages/ff/9d/fb46e729b461985f41a5740167688b924a4019141e5c164bea77548d3d9e/transformers-5.5.0.tar.gz", hash = "sha256:c8db656cf51c600cd8c75f06b20ef85c72e8b8ff9abc880c5d3e8bc70e0ddcbd", size = 8237745, upload-time = "2026-04-02T16:13:08.113Z" } 2424 2447 wheels = [ 2425 - { url = "https://files.pythonhosted.org/packages/f2/3a/8bdab26e09c5a242182b7ba9152e216d5ab4ae2d78c4298eb4872549cd35/transformers-4.47.1-py3-none-any.whl", hash = "sha256:d2f5d19bb6283cd66c893ec7e6d931d6370bbf1cc93633326ff1f41a40046c9c", size = 10133598, upload-time = "2024-12-17T15:42:35.1Z" }, 2448 + { url = "https://files.pythonhosted.org/packages/e7/28/35f7411ff80a3640c1f4fc907dcbb6a65061ebb82f66950e38bfc9f7f740/transformers-5.5.0-py3-none-any.whl", hash = "sha256:821a9ff0961abbb29eb1eb686d78df1c85929fdf213a3fe49dc6bd94f9efa944", size = 10245591, upload-time = "2026-04-02T16:13:03.462Z" }, 2426 2449 ] 2427 2450 2428 2451 [[package]] ··· 2454 2477 2455 2478 [[package]] 2456 2479 name = "trl" 2457 - version = "0.15.2" 2480 + version = "0.24.0" 2458 2481 source = { registry = "https://pypi.org/simple" } 2459 2482 dependencies = [ 2460 2483 { name = "accelerate" }, 2461 2484 { name = "datasets" }, 2462 - { name = "rich" }, 2463 2485 { name = "transformers" }, 2464 2486 ] 2465 - sdist = { url = "https://files.pythonhosted.org/packages/95/fe/ae0d782c48eef56d0ec125ebd05998539ede7cbf0e307a48f9323998b9e7/trl-0.15.2.tar.gz", hash = "sha256:0f82190a058a0a194dbcfae1fe9548b68a0a05b2f4d1824f8db1ae7d949cdd47", size = 333962, upload-time = "2025-02-25T22:34:34.595Z" } 2487 + sdist = { url = "https://files.pythonhosted.org/packages/e8/2e/30ece0055eee5763126e2d52f6e04aec294bcae34b46d9ca16c53c4b5852/trl-0.24.0.tar.gz", hash = "sha256:eee495223725d3da0596be2607581969db89ba0f7c00b075802addc31e61eac9", size = 368447, upload-time = "2025-10-16T00:10:37.65Z" } 2466 2488 wheels = [ 2467 - { url = "https://files.pythonhosted.org/packages/7b/29/25378447c48359843de0e4ce1995d367210601c3b437ddf1c779b6393d74/trl-0.15.2-py3-none-any.whl", hash = "sha256:bf2b88e3cf5da08cd533dc03273d977965bd5d86c5878f76285fba45d9cb9634", size = 318931, upload-time = "2025-02-25T22:34:32.768Z" }, 2489 + { url = "https://files.pythonhosted.org/packages/87/5f/c647fedde9d59ae35ee189cc49e419da5ac1d9ad9933cb69401a7eac4705/trl-0.24.0-py3-none-any.whl", hash = "sha256:a9145b7d4a4a33778de117bda48530f0cf5b2ac25acc07db80ad04836f490dfc", size = 423143, upload-time = "2025-10-16T00:10:35.809Z" }, 2468 2490 ] 2469 2491 2470 2492 [[package]] ··· 2477 2499 sdist = { url = "https://files.pythonhosted.org/packages/2b/e8/66e25efcc18542d58706ce4e50415710593721aae26e794ab1dec34fb66f/typeguard-4.5.1.tar.gz", hash = "sha256:f6f8ecbbc819c9bc749983cc67c02391e16a9b43b8b27f15dc70ed7c4a007274", size = 80121, upload-time = "2026-02-19T16:09:03.392Z" } 2478 2500 wheels = [ 2479 2501 { url = "https://files.pythonhosted.org/packages/91/88/b55b3117287a8540b76dbdd87733808d4d01c8067a3b339408c250bb3600/typeguard-4.5.1-py3-none-any.whl", hash = "sha256:44d2bf329d49a244110a090b55f5f91aa82d9a9834ebfd30bcc73651e4a8cc40", size = 36745, upload-time = "2026-02-19T16:09:01.6Z" }, 2502 + ] 2503 + 2504 + [[package]] 2505 + name = "typer" 2506 + version = "0.24.1" 2507 + source = { registry = "https://pypi.org/simple" } 2508 + dependencies = [ 2509 + { name = "annotated-doc" }, 2510 + { name = "click" }, 2511 + { name = "rich" }, 2512 + { name = "shellingham" }, 2513 + ] 2514 + sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } 2515 + wheels = [ 2516 + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, 2480 2517 ] 2481 2518 2482 2519 [[package]] ··· 2520 2557 { name = "torch" }, 2521 2558 { name = "torchvision" }, 2522 2559 { name = "tqdm" }, 2523 - { name = "transformers" }, 2524 2560 { name = "trl" }, 2525 2561 { name = "unsloth", extra = ["colab-new"] }, 2526 2562 { name = "uvicorn", extra = ["standard"] }, ··· 2543 2579 { name = "torch", specifier = ">=2.3" }, 2544 2580 { name = "torchvision", specifier = ">=0.18" }, 2545 2581 { name = "tqdm" }, 2546 - { name = "transformers", specifier = "==4.47.1" }, 2547 2582 { name = "trl", specifier = ">=0.15" }, 2548 2583 { name = "unsloth", extras = ["colab-new"] }, 2549 2584 { name = "uvicorn", extras = ["standard"], specifier = ">=0.29" }, ··· 2577 2612 2578 2613 [[package]] 2579 2614 name = "unsloth" 2580 - version = "2025.4.7" 2615 + version = "2026.4.4" 2581 2616 source = { registry = "https://pypi.org/simple" } 2582 2617 dependencies = [ 2583 2618 { name = "accelerate" }, ··· 2586 2621 { name = "diffusers" }, 2587 2622 { name = "hf-transfer" }, 2588 2623 { name = "huggingface-hub" }, 2624 + { name = "nest-asyncio" }, 2589 2625 { name = "numpy" }, 2590 2626 { name = "packaging" }, 2591 2627 { name = "peft" }, 2592 2628 { name = "protobuf" }, 2593 2629 { name = "psutil" }, 2630 + { name = "pydantic" }, 2631 + { name = "pyyaml" }, 2594 2632 { name = "sentencepiece" }, 2595 2633 { name = "torch" }, 2596 2634 { name = "torchvision" }, 2597 2635 { name = "tqdm" }, 2598 2636 { name = "transformers" }, 2599 - { name = "triton", marker = "sys_platform == 'linux'" }, 2600 - { name = "triton-windows", marker = "sys_platform == 'win32'" }, 2637 + { name = "triton", marker = "'linux' in sys_platform" }, 2638 + { name = "triton-windows", marker = "(platform_machine == 'AMD64' and sys_platform == 'win32') or (platform_machine == 'x86_64' and sys_platform == 'win32')" }, 2601 2639 { name = "trl" }, 2640 + { name = "typer" }, 2602 2641 { name = "tyro" }, 2603 2642 { name = "unsloth-zoo" }, 2604 2643 { name = "wheel" }, 2605 - { name = "xformers" }, 2644 + { name = "xformers", marker = "(platform_machine == 'AMD64' and 'linux' in sys_platform) or (platform_machine == 'x86_64' and 'linux' in sys_platform) or (platform_machine == 'AMD64' and sys_platform == 'win32') or (platform_machine == 'x86_64' and sys_platform == 'win32')" }, 2606 2645 ] 2607 - sdist = { url = "https://files.pythonhosted.org/packages/36/a2/cdfb59c6d8d2dce821cda3af9196e895c8d5d529bffc642fbfa638112616/unsloth-2025.4.7.tar.gz", hash = "sha256:479679b3713486af0d74b376eeefe64bbd2c6b3e76eb932d625b10e1d76ad306", size = 191726, upload-time = "2025-05-02T21:07:55.157Z" } 2646 + sdist = { url = "https://files.pythonhosted.org/packages/12/54/52822f5ecec70d8ce4733164df302eee55b4aba1cb3860e47c58d809dcd9/unsloth-2026.4.4.tar.gz", hash = "sha256:5d5c0c1d5bd48886927e34c2d9b59ee610c882e917ba1362fe6209a7c3eea97d", size = 66783124, upload-time = "2026-04-06T16:40:57.746Z" } 2608 2647 wheels = [ 2609 - { url = "https://files.pythonhosted.org/packages/6f/c7/1e762675727596707d327108c8931e94580f9752296da8e7e61fa4f671c2/unsloth-2025.4.7-py3-none-any.whl", hash = "sha256:d601113b6ab4de7c420a3163ce99ab9b149d01281693dfaa1c5e7036b59bb262", size = 218481, upload-time = "2025-05-02T21:07:51.285Z" }, 2648 + { url = "https://files.pythonhosted.org/packages/a8/80/f5246519a22f9b962f8d6364e980c83ca666b03aba693ef4223c70597000/unsloth-2026.4.4-py3-none-any.whl", hash = "sha256:da9d80a6f1a50b53ee641fbff254716945921c6ffc05d3bdbb676865a04cd27f", size = 62599611, upload-time = "2026-04-06T16:40:53.546Z" }, 2610 2649 ] 2611 2650 2612 2651 [package.optional-dependencies] ··· 2615 2654 { name = "datasets" }, 2616 2655 { name = "hf-transfer" }, 2617 2656 { name = "huggingface-hub" }, 2657 + { name = "nest-asyncio" }, 2618 2658 { name = "numpy" }, 2619 2659 { name = "packaging" }, 2620 2660 { name = "protobuf" }, 2621 2661 { name = "psutil" }, 2662 + { name = "pydantic" }, 2663 + { name = "pyyaml" }, 2622 2664 { name = "sentencepiece" }, 2623 2665 { name = "tqdm" }, 2624 2666 { name = "transformers" }, 2667 + { name = "typer" }, 2625 2668 { name = "tyro" }, 2626 2669 { name = "unsloth-zoo" }, 2627 2670 { name = "wheel" }, ··· 2629 2672 2630 2673 [[package]] 2631 2674 name = "unsloth-zoo" 2632 - version = "2025.4.4" 2675 + version = "2026.4.6" 2633 2676 source = { registry = "https://pypi.org/simple" } 2634 2677 dependencies = [ 2635 2678 { name = "accelerate" }, 2636 2679 { name = "cut-cross-entropy" }, 2637 2680 { name = "datasets" }, 2681 + { name = "filelock" }, 2638 2682 { name = "hf-transfer" }, 2639 - { name = "huggingface-hub", extra = ["hf-xet"] }, 2683 + { name = "huggingface-hub" }, 2640 2684 { name = "msgspec" }, 2641 2685 { name = "numpy" }, 2642 2686 { name = "packaging" }, ··· 2647 2691 { name = "regex" }, 2648 2692 { name = "sentencepiece" }, 2649 2693 { name = "torch" }, 2694 + { name = "torchao" }, 2650 2695 { name = "tqdm" }, 2651 2696 { name = "transformers" }, 2652 - { name = "triton", marker = "sys_platform == 'linux'" }, 2653 - { name = "triton-windows", marker = "sys_platform == 'win32'" }, 2697 + { name = "triton", marker = "'linux' in sys_platform" }, 2654 2698 { name = "trl" }, 2699 + { name = "typing-extensions" }, 2655 2700 { name = "tyro" }, 2656 2701 { name = "wheel" }, 2657 2702 ] 2658 - sdist = { url = "https://files.pythonhosted.org/packages/1d/37/2471cdc77fc503217be205e8c9d072c7300c1b6ba92968c4f5784ae4e413/unsloth_zoo-2025.4.4.tar.gz", hash = "sha256:cb005df07fa90a68f37c24bc769780fbbdc5f30fcb6326297b29d710472c3cd3", size = 115715, upload-time = "2025-05-01T14:40:19.212Z" } 2703 + sdist = { url = "https://files.pythonhosted.org/packages/a4/ea/a0b38fc3977526513905b1135f4dcde994b2f5a5c712ed83121c8d4af1a1/unsloth_zoo-2026.4.6.tar.gz", hash = "sha256:78083d47774ef7efee8e9cb3e211a6a70c2746b3080c6cd1f0b2ba1c08199a0e", size = 385361, upload-time = "2026-04-09T14:45:59.712Z" } 2659 2704 wheels = [ 2660 - { url = "https://files.pythonhosted.org/packages/90/f1/701393f74a1c9593097123be16544e2df2b464cc058f82880906090d5de2/unsloth_zoo-2025.4.4-py3-none-any.whl", hash = "sha256:e00eaa5e8581eaa228160549ee0af5adc569f4e536cc79190ab83b1829358438", size = 129048, upload-time = "2025-05-01T14:40:17.634Z" }, 2705 + { url = "https://files.pythonhosted.org/packages/51/23/bb59f2c00e25dbfefe65d636429658769c8e5fac8069dcaccd22a03238ce/unsloth_zoo-2026.4.6-py3-none-any.whl", hash = "sha256:326651efcb60d6124f702dd07cde3bfb85ad196ee6293a0deda9b76c4b3ee4ff", size = 418416, upload-time = "2026-04-09T14:45:58.387Z" }, 2661 2706 ] 2662 2707 2663 2708 [[package]]