The unpac monorepo manager self-hosting as a monorepo using unpac
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Use a rope of string array instead of a list of strings (#37)

* Use a rope of string array instead of a list of strings

This improves my usecase (a single file diffed with 2000 hunks by a factor of 20

* avoid physical equality, avoid polymorphic equality

* add test case "many-hunks" (from external)

---------

Co-authored-by: Kate <kit-ty-kate@exn.st>

authored by

Hannes Mehnert
Kate
and committed by
GitHub
c3897818 516e2b90

+192 -29
+8 -4
vendor/opam/patch/README.md
··· 3 3 The loosely specified `diff` file format is widely used for transmitting 4 4 differences of line-based information. The motivating example is 5 5 [`opam`](https://opam.ocaml.org), which is able to validate updates being 6 - cryptographically signed (e.g. [conex](https://github.com/hannesm/conex)) by 6 + cryptographically signed (e.g. [conex](https://github.com/robur-coop/conex)) by 7 7 providing a unified diff. 8 8 9 9 The [test-based infered specification](https://www.artima.com/weblogs/viewpost.jsp?thread=164293) ··· 50 50 to 1. NB from practical experiments, only "+1" and "-1" are supported. 51 51 52 52 ```OCaml 53 + type git_ext = 54 + | Rename_only of string * string 55 + | Delete_only 56 + | Create_only 57 + 53 58 type operation = 54 59 | Edit of string * string 55 60 | Delete of string 56 61 | Create of string 57 - | Rename_only of string * string 62 + | Git_ext of (string * string * git_ext) 58 63 59 64 type hunk (* positions and contents *) 60 65 ··· 73 78 74 79 The function `patch` assumes that the patch applies cleanly, and does not 75 80 check this assumption. Exceptions may be raised if this assumption is violated. 76 - The git diff format allows further features, such as file permissions, and also 77 - a "copy from / to" header, which I was unable to spot in the wild. 81 + The git diff format allows further features, such as file permissions. 78 82 79 83 ## Installation 80 84
+1 -1
vendor/opam/patch/src/dune
··· 2 2 (name patch) 3 3 (synopsis "Patch purely in OCaml") 4 4 (public_name patch) 5 - (modules patch lib fname)) 5 + (modules patch lib fname rope)) 6 6 7 7 (executable 8 8 (name patch_command)
-8
vendor/opam/patch/src/lib.ml
··· 57 57 | [] -> invalid_arg "List.last" 58 58 | [x] -> x 59 59 | _::xs -> last xs 60 - 61 - let rev_cut idx l = 62 - let rec aux acc idx = function 63 - | l when idx = 0 -> (acc, l) 64 - | [] -> invalid_arg "List.cut" 65 - | x::xs -> aux (x :: acc) (idx - 1) xs 66 - in 67 - aux [] idx l 68 60 end
-1
vendor/opam/patch/src/lib.mli
··· 9 9 10 10 module List : sig 11 11 val last : 'a list -> 'a 12 - val rev_cut : int -> 'a list -> 'a list * 'a list 13 12 end
+18 -15
vendor/opam/patch/src/patch.ml
··· 36 36 hunk.mine_start hunk.mine_len hunk.their_start hunk.their_len 37 37 (unified_diff ~mine_no_nl ~their_no_nl hunk) 38 38 39 - let rec apply_hunk ~cleanly ~fuzz (last_matched_line, offset, lines) ({mine_start; mine_len; mine; their_start = _; their_len; their} as hunk) = 39 + let rec apply_hunk ~cleanly ~fuzz (last_matched_line, offset, rope) ({mine_start; mine_len; mine; their_start = _; their_len; their} as hunk) = 40 40 let mine_start = mine_start + offset in 41 41 let patch_match ~search_offset = 42 42 let mine_start = mine_start + search_offset in 43 - let rev_prefix, rest = Lib.List.rev_cut (Stdlib.max 0 (mine_start - 1)) lines in 44 - let rev_actual_mine, suffix = Lib.List.rev_cut mine_len rest in 45 - let actual_mine = List.rev rev_actual_mine in 46 - if actual_mine <> (mine : string list) then 47 - invalid_arg "unequal mine"; 48 - (* TODO: should we check their_len against List.length their? *) 43 + let off_mine = Stdlib.max 0 (mine_start - 1) in 44 + let prefix = Rope.chop rope off_mine in 45 + let actual_mine = Rope.chop rope ~off:off_mine mine_len in 46 + let off = off_mine + mine_len in 47 + let suffix = Rope.shift rope off in 48 + if not (Rope.equal_to_string_list actual_mine mine) then 49 + invalid_arg "unequal mine"; 50 + let theirs = 51 + let nl = Rope.last_is_nl actual_mine in 52 + Rope.of_strings their nl 53 + in 49 54 (mine_start + mine_len, offset + (their_len - mine_len), 50 - (* TODO: Replace rev_append (rev ...) by the tail-rec when patch 51 - requires OCaml >= 4.14 *) 52 - List.rev_append rev_prefix (List.rev_append (List.rev their) suffix)) 55 + Rope.concat prefix (Rope.concat theirs suffix)) 53 56 in 54 57 try patch_match ~search_offset:0 55 58 with Invalid_argument _ -> 56 59 if cleanly then 57 60 invalid_arg "apply_hunk" 58 61 else 59 - let max_pos_offset = Stdlib.max 0 (List.length lines - Stdlib.max 0 (mine_start - 1) - mine_len) in 62 + let max_pos_offset = Stdlib.max 0 (Rope.length rope - Stdlib.max 0 (mine_start - 1) - mine_len) in 60 63 let max_neg_offset = mine_start - last_matched_line in 61 64 let rec locate search_offset = 62 65 let aux search_offset max_offset = ··· 100 103 else if mine_len = (hunk.mine_len : int) && their_len = (hunk.their_len : int) then 101 104 invalid_arg "apply_hunk: could not apply fuzz" 102 105 else 103 - apply_hunk ~cleanly ~fuzz:(fuzz + 1) (last_matched_line, offset, lines) hunk 106 + apply_hunk ~cleanly ~fuzz:(fuzz + 1) (last_matched_line, offset, rope) hunk 104 107 else 105 108 invalid_arg "apply_hunk" 106 109 else ··· 476 479 | _ -> assert false 477 480 end 478 481 | Edit _ -> 479 - let old = match filedata with None -> [] | Some x -> to_lines x in 480 - let _, _, lines = List.fold_left (apply_hunk ~cleanly ~fuzz:0) (0, 0, old) diff.hunks in 481 - let lines = String.concat "\n" lines in 482 + let old = match filedata with None -> Rope.empty | Some x -> Rope.of_string x in 483 + let _, _, rope = List.fold_left (apply_hunk ~cleanly ~fuzz:0) (0, 0, old) diff.hunks in 484 + let lines = Rope.to_string rope in 482 485 let lines = 483 486 match diff.mine_no_nl, diff.their_no_nl with 484 487 | false, true ->
+117
vendor/opam/patch/src/rope.ml
··· 1 + (* Originally from https://github.com/robur-coop/utcp, written by 2 + Calascibetta Romain <romain.calascibetta@gmail.com> *) 3 + 4 + (* A rope data structure where each node is a line *) 5 + 6 + type t = 7 + | Str of string array * bool * int * int 8 + | App of t * t * int 9 + 10 + let length = function 11 + | Str (_, _, len, _) -> len 12 + | App (_, _, len) -> len 13 + 14 + (* keep compatibility with 4.08 *) 15 + let min_int (a : int) (b : int) = min a b 16 + external unsafe_blit_string : string -> int -> bytes -> int -> int -> unit 17 + = "caml_blit_string" [@@noalloc] 18 + 19 + let append t1 t2 = 20 + App (t1, t2, length t1 + length t2) 21 + 22 + let empty = Str (Array.make 0 "", true, 0, 0) 23 + 24 + let rec unsafe_sub t start stop = 25 + if start = 0 && Int.equal stop (length t) then 26 + t 27 + else if Int.equal start stop then 28 + empty 29 + else match t with 30 + | Str (data, nl, len, off) -> 31 + assert (stop <= (len : int)); 32 + Str (data, nl, stop - start, off + start) 33 + | App (l, r, _) -> 34 + let len = length l in 35 + if stop <= (len : int) then unsafe_sub l start stop 36 + else if start >= (len : int) then unsafe_sub r (start - len) (stop - len) 37 + else append (unsafe_sub l start len) (unsafe_sub r 0 (stop - len)) 38 + 39 + let chop t ?(off = 0) len = 40 + if len < 0 || len > (length t - off : int) then 41 + invalid_arg "Rope.chop"; 42 + if len = 0 then empty else unsafe_sub t off (off + len) 43 + 44 + let shift t len = 45 + if len < 0 then 46 + invalid_arg "Rope.shift"; 47 + if len = 0 then 48 + t 49 + else 50 + let max = length t in 51 + let len = min_int max len in 52 + let l = len + (max - len) in 53 + unsafe_sub t len l 54 + 55 + let rec last_is_nl = function 56 + | Str (a, nl, len, off) -> if Int.equal (Array.length a - off) len then nl else true 57 + | App (_, r, _) -> last_is_nl r 58 + 59 + let rec byte_length = function 60 + | Str (s, _, len, off) as a -> 61 + let sum = ref 0 in 62 + for idx = off to len + off - 1 do 63 + let data = Array.unsafe_get s idx in 64 + sum := !sum + String.length data + 1 65 + done; 66 + !sum - if last_is_nl a then 0 else 1 67 + | App (l, r, _) -> byte_length l + byte_length r 68 + 69 + let rec into_bytes buf dst_off = function 70 + | Str (s, _, len, off) as a -> 71 + let off' = ref dst_off in 72 + for idx = off to len + off - 1 do 73 + let data = Array.unsafe_get s idx in 74 + unsafe_blit_string data 0 buf !off' (String.length data); 75 + off' := !off' + String.length data + 1; 76 + if idx - off < (len - 1 : int) || (Int.equal (idx - off) (len - 1) && last_is_nl a) then 77 + Bytes.unsafe_set buf (!off' - 1) '\n' 78 + done 79 + | App (l, r, _) -> 80 + into_bytes buf dst_off l; 81 + into_bytes buf (dst_off + byte_length l) r 82 + 83 + let to_string t = 84 + let len = byte_length t in 85 + let buf = Bytes.create len in 86 + into_bytes buf 0 t; 87 + Bytes.unsafe_to_string buf 88 + 89 + let concat a b = append a b 90 + 91 + let of_strings xs last_is_nl = 92 + let d = Array.of_list xs in 93 + Str (d, last_is_nl, Array.length d, 0) 94 + 95 + let of_string str = 96 + let splitted = String.split_on_char '\n' str in 97 + let last_is_nl = String.unsafe_get str (String.length str - 1) = '\n' in 98 + let d = Array.of_list splitted in 99 + Str (d, last_is_nl, Array.length d - (if last_is_nl then 1 else 0), 0) 100 + 101 + let rec equal_to_string_list t = function 102 + | [] -> length t = 0 103 + | hd :: tl -> 104 + let rec find_data = function 105 + | Str (data, _, len, off) -> 106 + if len > 0 then Some (Array.get data off) else None 107 + | App (l, r, _) -> 108 + if length l > 0 then 109 + find_data l 110 + else 111 + find_data r 112 + in 113 + match find_data t with 114 + | None -> false 115 + | Some data -> 116 + String.equal hd data && 117 + equal_to_string_list (shift t 1) tl
+36
vendor/opam/patch/src/rope.mli
··· 1 + type t (** The type for a rope data structure *) 2 + 3 + val length : t -> int 4 + (** [length t] returns the amount of strings in [t]. *) 5 + 6 + val empty : t 7 + (** [empty] is the empty rope. *) 8 + 9 + val of_strings : string list -> bool -> t 10 + (** [of_strings xs nl] is a rope [t] which contains the strings of [xs]. If 11 + [nl] is true, the last string will have a newline, otherwise not. *) 12 + 13 + val of_string : string -> t 14 + (** [of_string str] will split the string [str] on newline, and return a rope. *) 15 + 16 + val to_string : t -> string 17 + (** [to_string t] is the string where the contents of [t] is present. *) 18 + 19 + val chop : t -> ?off:int -> int -> t 20 + (** [chop t ~off len] returns a new rope that contains [len] strings starting 21 + at [off] of the provided rope [t]. Raises Invalid_argument if [len] and 22 + [off] are not inside the bounds. *) 23 + 24 + val shift : t -> int -> t 25 + (** [shift t len] returns a new rope that does not contain the first [len] 26 + strings, but only the remaining strings of [t]. *) 27 + 28 + val concat : t -> t -> t 29 + (** [concat t t'] returns a new rope which contains [t] followed by [t']. *) 30 + 31 + val last_is_nl : t -> bool 32 + (** [last_is_nl t] returns [true] if the last string should have a newline. *) 33 + 34 + val equal_to_string_list : t -> string list -> bool 35 + (** [equal_to_string_list t xs] returns [true] if the content of [t] is equal to 36 + the content of [xs]. *)
+12
vendor/opam/patch/test/test.ml
··· 1101 1101 Alcotest.(check string) __LOC__ expected (Option.get actual) 1102 1102 | None, _, _ | _, None, _ | _, _, None -> Alcotest.skip () 1103 1103 1104 + let many_hunks_old = lazy (opt_read "./external/many-hunks.old") 1105 + let many_hunks_new = lazy (opt_read "./external/many-hunks.new") 1106 + let many_hunks_diff = lazy (opt_read "./external/many-hunks.diff") 1107 + let many_hunks_apply () = 1108 + match Lazy.force many_hunks_old, Lazy.force many_hunks_new, Lazy.force many_hunks_diff with 1109 + | Some many_hunks_old, Some expected, Some diff -> 1110 + let patch = Patch.parse ~p:0 diff in 1111 + let actual = Patch.patch ~cleanly:true (Some many_hunks_old) (List.hd patch) in 1112 + Alcotest.(check string) __LOC__ expected (Option.get actual) 1113 + | None, _, _ | _, None, _ | _, _, None -> Alcotest.skip () 1114 + 1104 1115 let big_diff = [ 1105 1116 "parse", `Quick, parse_big; 1106 1117 "print", `Quick, print_big; 1107 1118 "parse own", `Quick, parse_own; 1108 1119 "1_000_000 print", `Quick, one_mil_print; 1109 1120 "1_000_000 apply", `Quick, one_mil_apply; 1121 + "many-hunks apply", `Quick, many_hunks_apply; 1110 1122 ] 1111 1123 1112 1124 let print_diff_mine_empty_their_no_nl () =