this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

create cartool

+143 -37
+1 -1
README.md
··· 14 14 python3 -m pip install . 15 15 ``` 16 16 17 - dev install: 17 + dev install: (editable) 18 18 19 19 ``` 20 20 python3 -m pip install -e .
+3
pyproject.toml
··· 34 34 [project.urls] 35 35 Homepage = "https://github.com/DavidBuchanan314/atmst" 36 36 Issues = "https://github.com/DavidBuchanan314/atmst/issues" 37 + 38 + [project.scripts] 39 + cartool = "atmst.cartool:main"
+13 -16
src/atmst/blockstore/car_reader.py
··· 1 1 from typing import Dict, List, Tuple, BinaryIO 2 + import hashlib 3 + 2 4 from multiformats import varint, CID 3 5 import dag_cbor 4 6 ··· 14 16 car_roots: List[CID] 15 17 block_offsets: Dict[bytes, Tuple[int, int]] # CID -> (offset, length) 16 18 17 - def __init__(self, file: BinaryIO) -> None: 19 + def __init__(self, file: BinaryIO, validate_hashes: bool=True) -> None: 18 20 """ 19 21 pre-scan over the whole file, recording the offsets of each block 20 22 """ 21 23 22 24 self.file = file 25 + self.validate_hashes = validate_hashes 23 26 file.seek(0) 24 27 25 28 # parse out CAR header ··· 30 33 header_obj = dag_cbor.decode(header) 31 34 if header_obj.get("version") != 1: 32 35 raise ValueError(f"unsupported CAR version ({header_obj.get('version')})") 33 - self.car_roots = header_obj["roots"] 36 + if len(header_obj["roots"]) != 1: 37 + raise ValueError(f"unsupported number of CAR roots ({len(header_obj['roots'])}, expected 1)") 38 + self.car_root = header_obj["roots"][0] 34 39 35 40 # scan through the CAR to find block offsets 36 41 self.block_offsets = {} ··· 56 61 value = self.file.read(length) 57 62 if len(value) != length: 58 63 raise EOFError() 64 + if self.validate_hashes: 65 + if key[:4] != b"\x01\x71\x12\x20": 66 + raise ValueError("unsupported CID type") 67 + digest = hashlib.sha256(value).digest() 68 + if digest != key[4:]: 69 + raise ValueError("bad CID hash!") 59 70 return value 60 71 61 72 def del_block(self, key: bytes) -> None: 62 73 raise NotImplementedError("ReadOnlyCARBlockStore does not support delete()") 63 - 64 - 65 - """ 66 - if __name__ == "__main__": 67 - f = open("/home/david/programming/python/bskyclient/retr0id.car", "rb") 68 - bs = ReadOnlyCARBlockStore(f) 69 - commit_obj = dag_cbor.decode(bs.get_block(bytes(bs.car_roots[0]))) 70 - print(commit_obj) 71 - mst_root: CID = commit_obj["data"] 72 - 73 - from ..mst import NodeStore 74 - ns = NodeStore(bs) 75 - print(ns.get_node(mst_root)) 76 - """
+101
src/atmst/cartool.py
··· 1 + import sys 2 + import os 3 + import base64 4 + import json 5 + 6 + import dag_cbor 7 + from multiformats import CID 8 + 9 + from .blockstore.car_reader import ReadOnlyCARBlockStore 10 + from .mst.node_store import NodeStore 11 + from .mst.node_walker import NodeWalker 12 + 13 + 14 + class ATJsonEncoder(json.JSONEncoder): 15 + def default(self, o): 16 + if isinstance(o, bytes): 17 + return {"$bytes": base64.b64encode(o).decode()} 18 + if isinstance(o, CID): 19 + return {"$link": o.encode("base32")} 20 + return json.JSONEncoder.default(self, o) 21 + 22 + def prettify_record(record) -> str: 23 + return json.dumps(record, indent=" ", cls=ATJsonEncoder) 24 + 25 + def print_info(car_path: str) -> None: 26 + print(f"Reading {car_path!r}") 27 + print(f"Size on disk: {os.stat(car_path).st_size} bytes") 28 + with open(car_path, "rb") as carfile: 29 + bs = ReadOnlyCARBlockStore(carfile) 30 + print("Total CAR blocks:", len(bs.block_offsets)) 31 + print("Root CID:", bs.car_root.encode("base32")) 32 + commit = dag_cbor.decode(bs.get_block(bytes(bs.car_root))) 33 + print() 34 + print("ATProto commit info:") 35 + print("Version:", commit["version"]) 36 + if commit["version"] != 3: 37 + print(f"Error: only v3 repo format is supported. Got:", commit["version"]) 38 + return 39 + print("Repo:", commit["did"]) 40 + print("Rev:", commit["rev"]) 41 + print("Sig:", base64.urlsafe_b64encode(commit["sig"]).decode()) 42 + print("MST root:", commit["data"].encode("base32")) 43 + 44 + def print_all_records(car_path: str, to_json: bool) -> None: 45 + with open(car_path, "rb") as carfile: 46 + bs = ReadOnlyCARBlockStore(carfile) 47 + ns = NodeStore(bs) 48 + commit = dag_cbor.decode(bs.get_block(bytes(bs.car_root))) 49 + nw = NodeWalker(ns, commit["data"]) 50 + for k, v in nw.iter_kv(): 51 + if to_json: 52 + record = dag_cbor.decode(bs.get_block(bytes(v))) 53 + print(f"{k} -> {prettify_record(record)}") 54 + else: 55 + print(f"{k} -> {v.encode('base32')}") 56 + 57 + def list_all(car_path: str): 58 + print_all_records(car_path, to_json=False) 59 + 60 + def dump_all(car_path: str): 61 + sys.setrecursionlimit(99999999) # allow printing very deeply nested records 62 + print_all_records(car_path, to_json=True) 63 + 64 + def dump_record(car_path: str, key: str): 65 + with open(car_path, "rb") as carfile: 66 + bs = ReadOnlyCARBlockStore(carfile) 67 + ns = NodeStore(bs) 68 + commit = dag_cbor.decode(bs.get_block(bytes(bs.car_root))) 69 + nw = NodeWalker(ns, commit["data"]) 70 + val = nw.find_value(key) 71 + if val is None: 72 + print("Record not found!", file=sys.stderr) 73 + sys.exit(-1) 74 + record = dag_cbor.decode(bs.get_block(bytes(val))) 75 + print(prettify_record(record)) 76 + 77 + COMMANDS = { 78 + "info": (print_info, "print CAR header and repo info"), 79 + "list": (list_all, "list all records in the CAR (values as CIDs)"), 80 + "dump": (dump_all, "dump all records in the CAR (values as JSON)"), 81 + "dump_record": (dump_record, "dump a single record keyed on ('collection/rkey')"), 82 + } 83 + 84 + def print_help(): 85 + print("USAGE: cartool COMMAND [args...]") 86 + print("") 87 + print("Available commands:") 88 + for cmdname, (cmdfn, helptext) in COMMANDS.items(): 89 + args = [f"<{arg}>" for arg in cmdfn.__code__.co_varnames[:cmdfn.__code__.co_argcount]] 90 + print(f"{cmdname} {' '.join(args)} : {helptext}") 91 + 92 + def main(): 93 + if len(sys.argv) < 2: 94 + print_help() 95 + return 96 + 97 + command, *args = sys.argv[1:] 98 + COMMANDS[command][0](*args) 99 + 100 + if __name__ == "__main__": 101 + main()
+25 -20
src/atmst/mst/node_walker.py
··· 1 1 from dataclasses import dataclass 2 - from typing import Tuple, Self, Optional, List 2 + from typing import Tuple, Self, Optional, List, Iterable 3 3 4 4 from multiformats import CID 5 5 ··· 108 108 return self.lkey, self.lval # the kv pair we just jumped over 109 109 110 110 # iterate over every k/v pair in key-sorted order 111 - def iter_kv(self): 111 + def iter_kv(self) -> Iterable[Tuple[str, CID]]: 112 112 while not self.is_final: 113 113 yield self.next_kv() 114 114 ··· 121 121 yield self.frame.node.cid 122 122 self.right() 123 123 124 + # start inclusive 125 + def iter_kv_range(self, start: str, end: str, end_inclusive: bool=False) -> Iterable[Tuple[str, CID]]: 126 + while True: 127 + while self.rkey < start: 128 + self.right() 129 + if not self.subtree: 130 + break 131 + self.down() 124 132 125 - def enumerate_mst(ns: NodeStore, root_cid: CID): 126 - for k, v in NodeWalker(ns, root_cid).iter_kv(): 127 - print(k, "->", v.encode("base32")) 128 - 129 - # start inclusive, end exclusive 130 - def enumerate_mst_range(ns: NodeStore, root_cid: CID, start: str, end: str): 131 - cur = NodeWalker(ns, root_cid) 132 - while True: 133 - while cur.rkey < start: 134 - cur.right() 135 - if not cur.subtree: 136 - break 137 - cur.down() 138 - 139 - for k, v, in cur.iter_kv(): 140 - if k >= end: 141 - break 142 - print(k, "->", v.encode("base32")) 133 + for k, v, in self.iter_kv(): 134 + if k > end or (not end_inclusive and k == end): 135 + break 136 + yield k, v 137 + 138 + def find_value(self, key: str) -> Optional[CID]: 139 + while True: 140 + while self.rkey < key: 141 + self.right() 142 + if not self.subtree: 143 + break 144 + self.down() 145 + if self.rkey != key: 146 + return None 147 + return self.rval