Installs pre-commit hooks for OCaml projects that run dune fmt automatically
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

precommit: expand AI attribution detection and rewriting

- Fix sed quoting bug (use double quotes for msg-filter, single for sed pattern)
- Add detection and rewriting of commits authored by Claude/Claude Code
- Rewrite AI authors to current git user via --env-filter

+109 -24
+109 -24
lib/precommit.ml
··· 243 243 244 244 type ai_commit = { hash : string; subject : string } 245 245 246 + (* Patterns that indicate AI-generated content in commit messages *) 247 + let ai_message_patterns = 248 + [ 249 + "Co-Authored-By.*[Cc]laude"; 250 + "https://claude\\.ai/"; 251 + "Generated with.*[Cc]laude"; 252 + ] 253 + 254 + (* Author names that indicate AI-authored commits *) 255 + let ai_author_patterns = 256 + [ "Claude"; "Claude Code"; "Claude Opus"; "Claude Sonnet" ] 257 + 258 + let parse_commit_line line = 259 + if String.length line > 8 then 260 + let hash = String.sub line 0 7 in 261 + let subject = String.sub line 8 (String.length line - 8) in 262 + Some { hash; subject } 263 + else None 264 + 246 265 let check_ai_attribution ~process_mgr ~fs dir = 247 266 if not (file_exists ~fs (Filename.concat dir ".git")) then [] 248 267 else 249 - (* Get commits by the configured user that contain AI attribution patterns. 250 - Use run_in_dir_opt to handle repos with no commits gracefully. *) 251 - let cmd = 252 - "git log --format='%h %s' --grep='Co-Authored-By.*[Cc]laude' \ 253 - --author=\"$(git config user.name)\" 2>/dev/null" 268 + (* Find commits with AI attribution in message body *) 269 + let grep_args = 270 + ai_message_patterns 271 + |> List.map (fun p -> "--grep='" ^ p ^ "'") 272 + |> String.concat " " 273 + in 274 + let msg_cmd = 275 + Printf.sprintf 276 + "git log --format='%%h %%s' %s --author=\"$(git config user.name)\" \ 277 + 2>/dev/null" 278 + grep_args 279 + in 280 + let msg_commits = 281 + match run_in_dir_opt ~process_mgr ~fs dir msg_cmd with 282 + | Error _ -> [] 283 + | Ok lines -> List.filter_map parse_commit_line lines 284 + in 285 + (* Find commits authored by AI *) 286 + let author_commits = 287 + ai_author_patterns 288 + |> List.concat_map (fun author -> 289 + let cmd = 290 + Printf.sprintf 291 + "git log --format='%%h %%s' --author='%s' 2>/dev/null" author 292 + in 293 + match run_in_dir_opt ~process_mgr ~fs dir cmd with 294 + | Error _ -> [] 295 + | Ok lines -> List.filter_map parse_commit_line lines) 254 296 in 255 - match run_in_dir_opt ~process_mgr ~fs dir cmd with 256 - | Error _ -> [] (* No commits or command failed *) 257 - | Ok lines -> 258 - List.filter_map 259 - (fun line -> 260 - if String.length line > 8 then 261 - let hash = String.sub line 0 7 in 262 - let subject = String.sub line 8 (String.length line - 8) in 263 - Some { hash; subject } 264 - else None) 265 - lines 297 + (* Combine and deduplicate by hash *) 298 + let all = msg_commits @ author_commits in 299 + let seen = Hashtbl.create 16 in 300 + List.filter 301 + (fun c -> 302 + if Hashtbl.mem seen c.hash then false 303 + else ( 304 + Hashtbl.add seen c.hash (); 305 + true)) 306 + all 266 307 267 308 let current_branch ~process_mgr ~fs dir = 268 309 match ··· 295 336 if not (file_exists ~fs (Filename.concat dir ".git")) then 296 337 Error (Printf.sprintf "%s: No .git directory found" dir) 297 338 else 339 + (* Build sed command to delete all AI attribution patterns from messages *) 340 + let sed_args = 341 + ai_message_patterns 342 + |> List.map (fun p -> "-e '/" ^ p ^ "/d'") 343 + |> String.concat " " 344 + in 345 + (* Build env-filter to replace AI authors with current user *) 346 + let author_conditions = 347 + ai_author_patterns 348 + |> List.map (fun a -> Printf.sprintf "\"$GIT_AUTHOR_NAME\" = '%s'" a) 349 + |> String.concat " -o " 350 + in 351 + let env_filter = 352 + Printf.sprintf 353 + "if [ %s ]; then export GIT_AUTHOR_NAME=\"$(git config user.name)\"; \ 354 + export GIT_AUTHOR_EMAIL=\"$(git config user.email)\"; export \ 355 + GIT_COMMITTER_NAME=\"$(git config user.name)\"; export \ 356 + GIT_COMMITTER_EMAIL=\"$(git config user.email)\"; fi" 357 + author_conditions 358 + in 298 359 let cmd = 299 - "FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --msg-filter \"sed \ 300 - '/[Cc]o-[Aa]uthored-[Bb]y:.*[Cc]laude/d'\" -- HEAD 2>&1" 360 + Printf.sprintf 361 + "FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --env-filter \ 362 + '%s' --msg-filter \"sed %s\" -- HEAD 2>&1" 363 + env_filter sed_args 301 364 in 302 365 match run_in_dir_opt ~process_mgr ~fs dir cmd with 303 366 | Error e -> Error (Printf.sprintf "%s: %s" dir e) 304 367 | Ok _lines -> 305 - (* Count how many commits were actually rewritten *) 306 - let count_cmd = 307 - "git log --format='%H' HEAD --grep='Co-Authored-By.*[Cc]laude' \ 308 - 2>/dev/null | wc -l" 368 + (* Count commits still with AI attribution in messages *) 369 + let grep_args = 370 + ai_message_patterns 371 + |> List.map (fun p -> "--grep='" ^ p ^ "'") 372 + |> String.concat " " 373 + in 374 + let msg_count_cmd = 375 + Printf.sprintf "git log --format='%%H' HEAD %s 2>/dev/null | wc -l" 376 + grep_args 309 377 in 310 - let remaining = 311 - match run_in_dir_opt ~process_mgr ~fs dir count_cmd with 378 + let msg_remaining = 379 + match run_in_dir_opt ~process_mgr ~fs dir msg_count_cmd with 312 380 | Ok (n :: _) -> ( try int_of_string (String.trim n) with _ -> 0) 313 381 | _ -> 0 314 382 in 383 + (* Count commits still authored by AI *) 384 + let author_remaining = 385 + ai_author_patterns 386 + |> List.fold_left 387 + (fun acc author -> 388 + let cmd = 389 + Printf.sprintf 390 + "git log --format='%%H' --author='%s' 2>/dev/null | wc -l" 391 + author 392 + in 393 + match run_in_dir_opt ~process_mgr ~fs dir cmd with 394 + | Ok (n :: _) -> ( 395 + try acc + int_of_string (String.trim n) with _ -> acc) 396 + | _ -> acc) 397 + 0 398 + in 399 + let remaining = msg_remaining + author_remaining in 315 400 (* Clean up refs/original *) 316 401 let _ = 317 402 run_in_dir_opt ~process_mgr ~fs dir