···44import json
5566import dag_cbor
77-from multiformats import CID
77+from multiformats import CID, varint
8899-from .blockstore.car_reader import ReadOnlyCARBlockStore
99+from .blockstore.car_file import ReadOnlyCARBlockStore
1010from .mst.node_store import NodeStore
1111from .mst.node_walker import NodeWalker
1212···7474 record = dag_cbor.decode(bs.get_block(bytes(val)))
7575 print(prettify_record(record))
76767777+def write_block(file, data):
7878+ file.write(varint.encode(len(data)))
7979+ file.write(data)
8080+8181+def compact(car_in: str, car_out: str):
8282+ with open(car_in, "rb") as carfile_in:
8383+ with open(car_out, "wb") as carfile_out:
8484+ bs = ReadOnlyCARBlockStore(carfile_in)
8585+8686+ new_header = dag_cbor.encode({
8787+ "version": 1,
8888+ "roots": [bs.car_root]
8989+ })
9090+ write_block(carfile_out, new_header)
9191+9292+ commit_blob = bs.get_block(bytes(bs.car_root))
9393+ commit = dag_cbor.decode(commit_blob)
9494+9595+ write_block(carfile_out, bytes(bs.car_root) + commit_blob)
9696+ dedup = {bs.car_root}
9797+9898+ ns = NodeStore(bs)
9999+ nw = NodeWalker(ns, commit["data"])
100100+101101+ for node in nw.iter_nodes():
102102+ if node.cid not in dedup:
103103+ write_block(carfile_out, bytes(node.cid) + node.serialised)
104104+ dedup.add(node.cid)
105105+ for v in node.vals:
106106+ if v not in dedup:
107107+ write_block(carfile_out, bytes(v) + bs.get_block(bytes(v)))
108108+ dedup.add(v)
109109+77110COMMANDS = {
78111 "info": (print_info, "print CAR header and repo info"),
79112 "list": (list_all, "list all records in the CAR (values as CIDs)"),
80113 "dump": (dump_all, "dump all records in the CAR (values as JSON)"),
81114 "dump_record": (dump_record, "dump a single record keyed on ('collection/rkey')"),
115115+ "compact": (compact, "rewrite the whole CAR, dropping any duplicated or unreferenced blocks"),
82116}
8311784118def print_help():
···86120 print("")
87121 print("Available commands:")
88122 for cmdname, (cmdfn, helptext) in COMMANDS.items():
8989- args = [f"<{arg}>" for arg in cmdfn.__code__.co_varnames[:cmdfn.__code__.co_argcount]]
123123+ fn_args = cmdfn.__code__.co_varnames[:cmdfn.__code__.co_argcount]
124124+ args = [f"<{arg}>" for arg in fn_args]
90125 print(f"{cmdname} {' '.join(args)} : {helptext}")
9112692127def main():
+7-3
src/atmst/mst/node_walker.py
···113113 yield self.next_kv()
114114115115 # get all mst nodes down and to the right of the current position
116116- def iter_node_cids(self):
117117- yield self.frame.node.cid
116116+ def iter_nodes(self) -> Iterable[MSTNode]:
117117+ yield self.frame.node
118118 while not self.is_final:
119119 while self.subtree: # recurse down every subtree
120120 self.down()
121121- yield self.frame.node.cid
121121+ yield self.frame.node
122122 self.right()
123123+124124+ def iter_node_cids(self) -> Iterable[CID]:
125125+ for node in self.iter_nodes():
126126+ yield node.cid
123127124128 # start inclusive
125129 def iter_kv_range(self, start: str, end: str, end_inclusive: bool=False) -> Iterable[Tuple[str, CID]]: