this repo has no description
1"""
2Export fine-tuned model to GGUF Q8_0 for llama-cli inference.
3
4Output: gemma4-math-ocr-Q8_0.gguf (in current directory)
5
6Usage: uv run export [--checkpoint checkpoints/hnm/final]
7 [--quant q8_0]
8"""
9
10import argparse
11from unsloth import FastVisionModel
12
13
14def main() -> None:
15 parser = argparse.ArgumentParser()
16 parser.add_argument("--checkpoint", default="checkpoints/hnm/final")
17 parser.add_argument("--quant", default="q8_0")
18 parser.add_argument("--out", default="gemma4-math-ocr")
19 args = parser.parse_args()
20
21 model, processor = FastVisionModel.from_pretrained(args.checkpoint, load_in_4bit=True)
22 model.save_pretrained_gguf(args.out, processor, quantization_method=args.quant)
23 print(f"Exported to {args.out}-{args.quant.upper()}.gguf")
24
25
26if __name__ == "__main__":
27 main()