this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

add CAR compaction command

+46 -7
+1 -1
src/atmst/all.py
··· 4 4 """ 5 5 6 6 from .blockstore import BlockStore, MemoryBlockStore 7 - from .blockstore.car_reader import ReadOnlyCARBlockStore 7 + from .blockstore.car_file import ReadOnlyCARBlockStore 8 8 from .mst.node_walker import NodeWalker 9 9 from .mst.node_store import NodeStore 10 10 from .mst.node_wrangler import NodeWrangler
src/atmst/blockstore/car_reader.py src/atmst/blockstore/car_file.py
+38 -3
src/atmst/cartool.py
··· 4 4 import json 5 5 6 6 import dag_cbor 7 - from multiformats import CID 7 + from multiformats import CID, varint 8 8 9 - from .blockstore.car_reader import ReadOnlyCARBlockStore 9 + from .blockstore.car_file import ReadOnlyCARBlockStore 10 10 from .mst.node_store import NodeStore 11 11 from .mst.node_walker import NodeWalker 12 12 ··· 74 74 record = dag_cbor.decode(bs.get_block(bytes(val))) 75 75 print(prettify_record(record)) 76 76 77 + def write_block(file, data): 78 + file.write(varint.encode(len(data))) 79 + file.write(data) 80 + 81 + def compact(car_in: str, car_out: str): 82 + with open(car_in, "rb") as carfile_in: 83 + with open(car_out, "wb") as carfile_out: 84 + bs = ReadOnlyCARBlockStore(carfile_in) 85 + 86 + new_header = dag_cbor.encode({ 87 + "version": 1, 88 + "roots": [bs.car_root] 89 + }) 90 + write_block(carfile_out, new_header) 91 + 92 + commit_blob = bs.get_block(bytes(bs.car_root)) 93 + commit = dag_cbor.decode(commit_blob) 94 + 95 + write_block(carfile_out, bytes(bs.car_root) + commit_blob) 96 + dedup = {bs.car_root} 97 + 98 + ns = NodeStore(bs) 99 + nw = NodeWalker(ns, commit["data"]) 100 + 101 + for node in nw.iter_nodes(): 102 + if node.cid not in dedup: 103 + write_block(carfile_out, bytes(node.cid) + node.serialised) 104 + dedup.add(node.cid) 105 + for v in node.vals: 106 + if v not in dedup: 107 + write_block(carfile_out, bytes(v) + bs.get_block(bytes(v))) 108 + dedup.add(v) 109 + 77 110 COMMANDS = { 78 111 "info": (print_info, "print CAR header and repo info"), 79 112 "list": (list_all, "list all records in the CAR (values as CIDs)"), 80 113 "dump": (dump_all, "dump all records in the CAR (values as JSON)"), 81 114 "dump_record": (dump_record, "dump a single record keyed on ('collection/rkey')"), 115 + "compact": (compact, "rewrite the whole CAR, dropping any duplicated or unreferenced blocks"), 82 116 } 83 117 84 118 def print_help(): ··· 86 120 print("") 87 121 print("Available commands:") 88 122 for cmdname, (cmdfn, helptext) in COMMANDS.items(): 89 - args = [f"<{arg}>" for arg in cmdfn.__code__.co_varnames[:cmdfn.__code__.co_argcount]] 123 + fn_args = cmdfn.__code__.co_varnames[:cmdfn.__code__.co_argcount] 124 + args = [f"<{arg}>" for arg in fn_args] 90 125 print(f"{cmdname} {' '.join(args)} : {helptext}") 91 126 92 127 def main():
+7 -3
src/atmst/mst/node_walker.py
··· 113 113 yield self.next_kv() 114 114 115 115 # get all mst nodes down and to the right of the current position 116 - def iter_node_cids(self): 117 - yield self.frame.node.cid 116 + def iter_nodes(self) -> Iterable[MSTNode]: 117 + yield self.frame.node 118 118 while not self.is_final: 119 119 while self.subtree: # recurse down every subtree 120 120 self.down() 121 - yield self.frame.node.cid 121 + yield self.frame.node 122 122 self.right() 123 + 124 + def iter_node_cids(self) -> Iterable[CID]: 125 + for node in self.iter_nodes(): 126 + yield node.cid 123 127 124 128 # start inclusive 125 129 def iter_kv_range(self, start: str, end: str, end_inclusive: bool=False) -> Iterable[Tuple[str, CID]]: