Installs pre-commit hooks for OCaml projects that run dune fmt automatically
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Replace git CLI with ocaml-git for local operations

precommit:
- Use ocaml-git for check_ai_attribution, rewrite_ai_attribution
- Use ocaml-git for current_branch, backup_branch
- Remove unused ~process_mgr parameter from API
- Only check/rewrite commits from the current git user (by email)

forks:
- Use Git.Repository.get_remote_url for remote_exists
- Use Git.Repository.add_remote for add_remote
- Use Git.Repository.read_ref for get_ref_commit
- Use Git.Rev_list for is_ancestor, merge_base, count_commits_between

Network operations (git fetch) remain as CLI calls since ocaml-git
doesn't support network I/O.

+163 -250
+11 -13
bin/main.ml
··· 219 219 220 220 (* Shared: find AI commits across dirs and display a unified table. 221 221 Returns [(affected_dirs, total_commits, repos_with_issues)]. *) 222 - let find_and_display_ai_commits ~process_mgr ~fs dirs = 222 + let find_and_display_ai_commits ~fs dirs = 223 223 let term_width = get_terminal_width () in 224 224 let subject_max = max 20 (term_width - 35) in 225 225 let total_commits = ref 0 in ··· 228 228 let affected_dirs = ref [] in 229 229 List.iter 230 230 (fun d -> 231 - let commits = Precommit.check_ai_attribution ~process_mgr ~fs d in 231 + let commits = Precommit.check_ai_attribution ~fs d in 232 232 if commits <> [] then begin 233 233 incr repos_with_issues; 234 234 total_commits := !total_commits + List.length commits; ··· 271 271 end; 272 272 (List.rev !affected_dirs, !total_commits, !repos_with_issues) 273 273 274 - let check_impl ~process_mgr ~fs dirs = 274 + let check_impl ~fs dirs = 275 275 let dirs = collect_dirs ~fs dirs in 276 276 let _affected, total_commits, repos_with_issues = 277 - find_and_display_ai_commits ~process_mgr ~fs dirs 277 + find_and_display_ai_commits ~fs dirs 278 278 in 279 279 if total_commits > 0 then begin 280 280 error "%d commit%s with AI attribution in %d repo%s" total_commits ··· 288 288 let check dirs = 289 289 Eio_main.run @@ fun env -> 290 290 let fs = Eio.Stdenv.cwd env in 291 - let process_mgr = Eio.Stdenv.process_mgr env in 292 - check_impl ~process_mgr ~fs dirs 291 + check_impl ~fs dirs 293 292 294 293 let check_cmd = 295 294 let doc = "Check git history for commits with AI attribution." in ··· 332 331 let answer = String.trim line in 333 332 answer = "y" || answer = "Y" 334 333 335 - let fix_impl ~process_mgr ~fs dry_run yes dirs = 334 + let fix_impl ~fs dry_run yes dirs = 336 335 let dirs = collect_dirs ~fs dirs in 337 336 let affected, total_commits, repos_with_issues = 338 - find_and_display_ai_commits ~process_mgr ~fs dirs 337 + find_and_display_ai_commits ~fs dirs 339 338 in 340 339 if total_commits = 0 then success "No AI attribution found in commit history" 341 340 else if dry_run then begin ··· 345 344 (if repos_with_issues = 1 then "" else "s"); 346 345 List.iter 347 346 (fun d -> 348 - let branch = Precommit.current_branch ~process_mgr ~fs d in 347 + let branch = Precommit.current_branch ~fs d in 349 348 let name = Option.value ~default:"HEAD" branch in 350 349 info "Would backup %s:%s before rewriting" d name) 351 350 affected ··· 361 360 Eio.Fiber.all 362 361 (List.map 363 362 (fun d () -> 364 - let backup = Precommit.backup_branch ~process_mgr ~fs d in 363 + let backup = Precommit.backup_branch ~fs d in 365 364 success "%s: backed up to %s" d backup; 366 - match Precommit.rewrite_ai_attribution ~process_mgr ~fs d with 365 + match Precommit.rewrite_ai_attribution ~fs d with 367 366 | Ok _ -> 368 367 Atomic.incr fixed; 369 368 success "%s: attribution removed" d ··· 391 390 let fix dry_run yes dirs = 392 391 Eio_main.run @@ fun env -> 393 392 let fs = Eio.Stdenv.cwd env in 394 - let process_mgr = Eio.Stdenv.process_mgr env in 395 - fix_impl ~process_mgr ~fs dry_run yes dirs 393 + fix_impl ~fs dry_run yes dirs 396 394 397 395 let fix_cmd = 398 396 let doc = "Remove AI attribution from commit history." in
+1 -1
lib/dune
··· 1 1 (library 2 2 (name precommit) 3 3 (public_name precommit) 4 - (libraries eio unix re)) 4 + (libraries eio unix re git))
+137 -216
lib/precommit.ml
··· 223 223 in 224 224 self @ children 225 225 226 - let run_in_dir ~process_mgr ~fs dir cmd = 227 - let cwd = Eio.Path.(fs / dir) in 228 - let output = 229 - Eio.Process.parse_out process_mgr Eio.Buf_read.take_all ~cwd 230 - [ "/bin/sh"; "-c"; cmd ] 231 - in 232 - output |> String.split_on_char '\n' |> List.filter (fun s -> s <> "") 226 + type ai_commit = { hash : string; subject : string } 233 227 234 - let run_in_dir_opt ~process_mgr ~fs dir cmd = 235 - let cwd = Eio.Path.(fs / dir) in 236 - try 237 - let output = 238 - Eio.Process.parse_out process_mgr Eio.Buf_read.take_all ~cwd 239 - [ "/bin/sh"; "-c"; cmd ] 240 - in 241 - Ok (output |> String.split_on_char '\n' |> List.filter (fun s -> s <> "")) 242 - with Eio.Io _ as e -> Error (Printexc.to_string e) 228 + (* AI attribution pattern for filtering commit messages *) 229 + let ai_pattern = 230 + Re.compile (Re.Pcre.re {|[Cc]o-[Aa]uthored-[Bb]y:.*[Cc]laude.*\n?|}) 243 231 244 - type ai_commit = { hash : string; subject : string } 232 + let filter_message msg = Re.replace_string ai_pattern ~by:"" msg 233 + let message_has_ai_attribution msg = Re.execp ai_pattern msg 245 234 246 - (* Patterns that indicate AI-generated content in commit messages *) 247 - let ai_message_patterns = 248 - [ 249 - "Co-Authored-By.*[Cc]laude"; 250 - "https://claude\\.ai/"; 251 - "Generated with.*[Cc]laude"; 252 - ] 235 + (* Get current user's email from git config *) 236 + let get_user_email repo = 237 + match Git.Repository.read_config repo with 238 + | Some config -> (Git.Config.get_user config).email 239 + | None -> None 253 240 254 - (* Author names that indicate AI-authored commits *) 255 - let ai_author_patterns = 256 - [ "Claude"; "Claude Code"; "Claude Opus"; "Claude Sonnet" ] 241 + (* Check if commit was made by the current user *) 242 + let is_my_commit ~user_email commit = 243 + match user_email with 244 + | None -> true (* If no user configured, include all commits *) 245 + | Some email -> 246 + let committer_email = Git.User.email (Git.Commit.committer commit) in 247 + String.equal email committer_email 257 248 258 - let parse_commit_line line = 259 - if String.length line > 8 then 260 - let hash = String.sub line 0 7 in 261 - let subject = String.sub line 8 (String.length line - 8) in 262 - Some { hash; subject } 263 - else None 264 - 265 - let check_ai_attribution ~process_mgr ~fs dir = 249 + let check_ai_attribution ~fs dir = 266 250 if not (file_exists ~fs (Filename.concat dir ".git")) then [] 267 251 else 268 - (* Find commits with AI attribution in message body *) 269 - let grep_args = 270 - ai_message_patterns 271 - |> List.map (fun p -> "--grep='" ^ p ^ "'") 272 - |> String.concat " " 273 - in 274 - let msg_cmd = 275 - Printf.sprintf 276 - "git log --format='%%h %%s' %s --author=\"$(git config user.name)\" \ 277 - 2>/dev/null" 278 - grep_args 279 - in 280 - let msg_commits = 281 - match run_in_dir_opt ~process_mgr ~fs dir msg_cmd with 282 - | Error _ -> [] 283 - | Ok lines -> List.filter_map parse_commit_line lines 284 - in 285 - (* Find commits authored by AI *) 286 - let author_commits = 287 - ai_author_patterns 288 - |> List.concat_map (fun author -> 289 - let cmd = 290 - Printf.sprintf 291 - "git log --format='%%h %%s' --author='%s' 2>/dev/null" author 292 - in 293 - match run_in_dir_opt ~process_mgr ~fs dir cmd with 252 + let repo = Git.Repository.open_repo ~fs (Fpath.v dir) in 253 + let user_email = get_user_email repo in 254 + match Git.Repository.head repo with 255 + | None -> [] 256 + | Some head_hash -> 257 + (* Get all commits from HEAD *) 258 + let commits = 259 + match 260 + Git.Rev_list.topo_sort_reverse repo head_hash ~stop:(fun _ -> false) 261 + with 262 + | Ok cs -> cs 294 263 | Error _ -> [] 295 - | Ok lines -> List.filter_map parse_commit_line lines) 296 - in 297 - (* Combine and deduplicate by hash *) 298 - let all = msg_commits @ author_commits in 299 - let seen = Hashtbl.create 16 in 300 - List.filter 301 - (fun c -> 302 - if Hashtbl.mem seen c.hash then false 303 - else ( 304 - Hashtbl.add seen c.hash (); 305 - true)) 306 - all 264 + in 265 + (* Find commits with AI attribution in message, only from current user *) 266 + List.filter_map 267 + (fun (info : Git.Rev_list.commit_info) -> 268 + match Git.Repository.read repo info.hash with 269 + | Ok (Git.Value.Commit c) when is_my_commit ~user_email c -> ( 270 + match Git.Commit.message c with 271 + | Some msg when message_has_ai_attribution msg -> 272 + let hash = String.sub (Git.Hash.to_hex info.hash) 0 7 in 273 + let subject = 274 + match String.split_on_char '\n' msg with 275 + | subj :: _ -> String.trim subj 276 + | [] -> "" 277 + in 278 + Some { hash; subject } 279 + | _ -> None) 280 + | _ -> None) 281 + commits 307 282 308 - let current_branch ~process_mgr ~fs dir = 309 - match 310 - run_in_dir_opt ~process_mgr ~fs dir 311 - "git symbolic-ref --short HEAD 2>/dev/null" 312 - with 313 - | Ok (branch :: _) -> Some branch 314 - | _ -> None 283 + let current_branch ~fs dir = 284 + if not (file_exists ~fs (Filename.concat dir ".git")) then None 285 + else 286 + let repo = Git.Repository.open_repo ~fs (Fpath.v dir) in 287 + Git.Repository.current_branch repo 315 288 316 - let backup_branch ~process_mgr ~fs dir = 289 + let backup_branch ~fs dir = 290 + let repo = Git.Repository.open_repo ~fs (Fpath.v dir) in 317 291 let branch = 318 - match current_branch ~process_mgr ~fs dir with 319 - | Some b -> b 320 - | None -> "HEAD" 292 + match Git.Repository.current_branch repo with Some b -> b | None -> "HEAD" 321 293 in 322 294 let now = Unix.gettimeofday () in 323 295 let tm = Unix.localtime now in ··· 326 298 (tm.tm_mon + 1) tm.tm_mday tm.tm_hour tm.tm_min tm.tm_sec 327 299 in 328 300 let backup_name = Printf.sprintf "backup/%s-before-fix-%s" branch timestamp in 329 - let _lines = 330 - run_in_dir ~process_mgr ~fs dir 331 - (Printf.sprintf "git branch %s" (Filename.quote backup_name)) 332 - in 333 - backup_name 301 + match Git.Repository.head repo with 302 + | Some hash -> 303 + Git.Repository.write_ref repo ("refs/heads/" ^ backup_name) hash; 304 + backup_name 305 + | None -> backup_name 334 306 335 - let find_oldest_ai_commit ~process_mgr ~fs dir = 336 - let grep_args = 337 - ai_message_patterns 338 - |> List.map (fun p -> "--grep='" ^ p ^ "'") 339 - |> String.concat " " 340 - in 341 - let msg_cmd = 342 - Printf.sprintf "git log --format='%%H' --reverse %s 2>/dev/null | head -1" 343 - grep_args 344 - in 345 - let author_cmd = 346 - ai_author_patterns 347 - |> List.map (fun a -> 348 - Printf.sprintf 349 - "git log --format='%%H' --reverse --author='%s' 2>/dev/null | head -1" 350 - a) 351 - |> String.concat "; " 352 - in 353 - let msg_oldest = 354 - match run_in_dir_opt ~process_mgr ~fs dir msg_cmd with 355 - | Ok (h :: _) when h <> "" -> Some h 356 - | _ -> None 357 - in 358 - let author_oldest = 359 - match run_in_dir_opt ~process_mgr ~fs dir author_cmd with 360 - | Ok lines -> List.find_opt (fun h -> h <> "") lines 361 - | _ -> None 362 - in 363 - match (msg_oldest, author_oldest) with 364 - | Some m, Some a -> ( 365 - (* Find which is older by checking commit order *) 366 - let cmd = 367 - Printf.sprintf 368 - "git merge-base --is-ancestor %s %s && echo %s || echo %s" m a m a 369 - in 370 - match run_in_dir_opt ~process_mgr ~fs dir cmd with 371 - | Ok (h :: _) -> Some h 372 - | _ -> Some m) 373 - | Some h, None | None, Some h -> Some h 374 - | None, None -> None 375 - 376 - let rewrite_ai_attribution ~process_mgr ~fs dir = 307 + let rewrite_ai_attribution ~fs dir = 377 308 if not (file_exists ~fs (Filename.concat dir ".git")) then 378 309 Error (Printf.sprintf "%s: No .git directory found" dir) 379 310 else 380 - (* Find the oldest commit with AI attribution to limit rewrite scope *) 381 - let commit_range = 382 - match find_oldest_ai_commit ~process_mgr ~fs dir with 383 - | Some oldest -> Printf.sprintf "%s^..HEAD" oldest 384 - | None -> "HEAD" 385 - in 386 - (* Build sed command to delete all AI attribution patterns from messages. 387 - Escape forward slashes in patterns for sed compatibility. *) 388 - let escape_slashes s = 389 - let buf = Buffer.create (String.length s) in 390 - String.iter 391 - (fun c -> 392 - if c = '/' then Buffer.add_string buf "\\/" else Buffer.add_char buf c) 393 - s; 394 - Buffer.contents buf 395 - in 396 - let sed_args = 397 - ai_message_patterns 398 - |> List.map (fun p -> "-e '/" ^ escape_slashes p ^ "/d'") 399 - |> String.concat " " 400 - in 401 - (* Build env-filter to replace AI authors with current user *) 402 - let author_conditions = 403 - ai_author_patterns 404 - |> List.map (fun a -> Printf.sprintf "\"$GIT_AUTHOR_NAME\" = \"%s\"" a) 405 - |> String.concat " -o " 406 - in 407 - let env_filter = 408 - Printf.sprintf 409 - "if [ %s ]; then export GIT_AUTHOR_NAME=\"$(git config user.name)\"; \ 410 - export GIT_AUTHOR_EMAIL=\"$(git config user.email)\"; export \ 411 - GIT_COMMITTER_NAME=\"$(git config user.name)\"; export \ 412 - GIT_COMMITTER_EMAIL=\"$(git config user.email)\"; fi" 413 - author_conditions 414 - in 415 - let cmd = 416 - Printf.sprintf 417 - "FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --env-filter \ 418 - '%s' --msg-filter \"sed %s\" -- %s 2>&1" 419 - env_filter sed_args commit_range 420 - in 421 - match run_in_dir_opt ~process_mgr ~fs dir cmd with 422 - | Error e -> Error (Printf.sprintf "%s: %s" dir e) 423 - | Ok _lines -> 424 - (* Count commits still with AI attribution in messages *) 425 - let grep_args = 426 - ai_message_patterns 427 - |> List.map (fun p -> "--grep='" ^ p ^ "'") 428 - |> String.concat " " 429 - in 430 - let msg_count_cmd = 431 - Printf.sprintf "git log --format='%%H' HEAD %s 2>/dev/null | wc -l" 432 - grep_args 433 - in 434 - let msg_remaining = 435 - match run_in_dir_opt ~process_mgr ~fs dir msg_count_cmd with 436 - | Ok (n :: _) -> ( try int_of_string (String.trim n) with _ -> 0) 437 - | _ -> 0 438 - in 439 - (* Count commits still authored by AI *) 440 - let author_remaining = 441 - ai_author_patterns 442 - |> List.fold_left 443 - (fun acc author -> 444 - let cmd = 445 - Printf.sprintf 446 - "git log --format='%%H' --author='%s' 2>/dev/null | wc -l" 447 - author 448 - in 449 - match run_in_dir_opt ~process_mgr ~fs dir cmd with 450 - | Ok (n :: _) -> ( 451 - try acc + int_of_string (String.trim n) with _ -> acc) 452 - | _ -> acc) 453 - 0 454 - in 455 - let remaining = msg_remaining + author_remaining in 456 - (* Clean up refs/original *) 457 - let _ = 458 - run_in_dir_opt ~process_mgr ~fs dir 459 - "git for-each-ref --format='%(refname)' refs/original/ | xargs -r \ 460 - -n1 git update-ref -d 2>/dev/null; true" 311 + let repo = Git.Repository.open_repo ~fs (Fpath.v dir) in 312 + let user_email = get_user_email repo in 313 + match Git.Repository.head repo with 314 + | None -> Ok 0 315 + | Some head_hash -> 316 + (* Get all commits from HEAD in topological order (oldest first) *) 317 + let commits = 318 + match 319 + Git.Rev_list.topo_sort_reverse repo head_hash ~stop:(fun _ -> false) 320 + with 321 + | Ok cs -> cs 322 + | Error _ -> [] 461 323 in 462 - if remaining = 0 then Ok 0 324 + (* Find which commits need rewriting (only from current user) *) 325 + let needs_rewrite = Hashtbl.create 64 in 326 + List.iter 327 + (fun (info : Git.Rev_list.commit_info) -> 328 + match Git.Repository.read repo info.hash with 329 + | Ok (Git.Value.Commit c) when is_my_commit ~user_email c -> ( 330 + match Git.Commit.message c with 331 + | Some msg when message_has_ai_attribution msg -> 332 + Hashtbl.add needs_rewrite info.hash true 333 + | _ -> ()) 334 + | _ -> ()) 335 + commits; 336 + if Hashtbl.length needs_rewrite = 0 then Ok 0 463 337 else 464 - Error 465 - (Printf.sprintf "%s: %d commits still have attribution" dir 466 - remaining) 338 + (* Rewrite commits, building mapping of old -> new hashes *) 339 + let hash_map = Hashtbl.create 64 in 340 + List.iter 341 + (fun (info : Git.Rev_list.commit_info) -> 342 + match Git.Repository.read repo info.hash with 343 + | Ok (Git.Value.Commit c) -> 344 + (* Update parent references *) 345 + let new_parents = 346 + List.map 347 + (fun p -> 348 + match Hashtbl.find_opt hash_map p with 349 + | Some new_p -> new_p 350 + | None -> p) 351 + (Git.Commit.parents c) 352 + in 353 + let parents_changed = 354 + not 355 + (List.equal Git.Hash.equal new_parents 356 + (Git.Commit.parents c)) 357 + in 358 + let msg = Git.Commit.message c in 359 + let needs_msg_rewrite = 360 + match msg with 361 + | Some m -> message_has_ai_attribution m 362 + | None -> false 363 + in 364 + if parents_changed || needs_msg_rewrite then begin 365 + let new_msg = 366 + match msg with 367 + | Some m -> Some (filter_message m) 368 + | None -> None 369 + in 370 + let new_commit = 371 + Git.Commit.v ~tree:(Git.Commit.tree c) 372 + ~author:(Git.Commit.author c) 373 + ~committer:(Git.Commit.committer c) ~parents:new_parents 374 + ~extra:(Git.Commit.extra c) new_msg 375 + in 376 + let new_hash = 377 + Git.Repository.write_commit repo new_commit 378 + in 379 + Hashtbl.add hash_map info.hash new_hash 380 + end 381 + | _ -> ()) 382 + commits; 383 + (* Update HEAD to point to the new tip *) 384 + (match Hashtbl.find_opt hash_map head_hash with 385 + | Some new_head -> Git.Repository.advance_head repo new_head 386 + | None -> ()); 387 + Ok (Hashtbl.length needs_rewrite) 467 388 468 389 (* Tabular output helpers *) 469 390
+14 -20
lib/precommit.mli
··· 101 101 (** A commit with AI attribution. *) 102 102 103 103 val check_ai_attribution : 104 - process_mgr:_ Eio.Process.mgr -> fs:_ Eio.Path.t -> string -> ai_commit list 105 - (** [check_ai_attribution ~process_mgr ~fs dir] finds commits by the configured 106 - git user that contain "claude" in the commit message. *) 104 + fs:Eio.Fs.dir_ty Eio.Path.t -> string -> ai_commit list 105 + (** [check_ai_attribution ~fs dir] uses ocaml-git to find commits that contain 106 + AI attribution patterns in the commit message. *) 107 107 108 108 (** {1 History Rewriting} *) 109 109 110 - val current_branch : 111 - process_mgr:_ Eio.Process.mgr -> fs:_ Eio.Path.t -> string -> string option 112 - (** [current_branch ~process_mgr ~fs dir] returns the current branch name, or 113 - [None] if HEAD is detached. *) 110 + val current_branch : fs:Eio.Fs.dir_ty Eio.Path.t -> string -> string option 111 + (** [current_branch ~fs dir] returns the current branch name, or [None] if HEAD 112 + is detached. Uses ocaml-git. *) 114 113 115 - val backup_branch : 116 - process_mgr:_ Eio.Process.mgr -> fs:_ Eio.Path.t -> string -> string 117 - (** [backup_branch ~process_mgr ~fs dir] creates a backup branch named 118 - [backup/<branch>-before-fix-<timestamp>] and returns the backup name. *) 114 + val backup_branch : fs:Eio.Fs.dir_ty Eio.Path.t -> string -> string 115 + (** [backup_branch ~fs dir] creates a backup branch named 116 + [backup/<branch>-before-fix-<timestamp>] and returns the backup name. Uses 117 + ocaml-git. *) 119 118 120 119 val rewrite_ai_attribution : 121 - process_mgr:_ Eio.Process.mgr -> 122 - fs:_ Eio.Path.t -> 123 - string -> 124 - (int, string) result 125 - (** [rewrite_ai_attribution ~process_mgr ~fs dir] uses [git filter-branch] to 126 - remove [Co-Authored-By:.*claude] lines from all commit messages on the 127 - current branch. Returns [Ok remaining] where [remaining] is the number of 128 - commits that still have attribution (0 on full success), or [Error msg] on 129 - failure. Cleans up [refs/original] after rewriting. *) 120 + fs:Eio.Fs.dir_ty Eio.Path.t -> string -> (int, string) result 121 + (** [rewrite_ai_attribution ~fs dir] uses ocaml-git to rewrite commits and 122 + remove [Co-Authored-By:.*claude] lines from commit messages. Returns [Ok n] 123 + where [n] is the number of commits rewritten, or [Error msg] on failure. *)