this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

NodeStore: pre-initialise known cid/serialised values

+37 -36
+13 -14
src/atmst/cartool.py
··· 2 2 import os 3 3 import base64 4 4 import json 5 - from typing import Tuple, Any 5 + from typing import Tuple, Any, BinaryIO 6 6 7 7 import dag_cbor 8 8 from multiformats import CID, varint ··· 25 25 def prettify_record(record) -> str: 26 26 return json.dumps(record, indent=" ", cls=ATJsonEncoder) 27 27 28 - def open_car(car_path: str) -> Tuple[ReadOnlyCARBlockStore, Any, NodeWalker]: 28 + def open_car(car_path: str) -> Tuple[ReadOnlyCARBlockStore, dict]: 29 29 carfile = open(car_path, "rb") 30 30 bs = ReadOnlyCARBlockStore(carfile) 31 31 commit = dag_cbor.decode(bs.get_block(bytes(bs.car_root))) 32 - nw = NodeWalker(NodeStore(bs), commit["data"]) 33 - return bs, commit, nw 32 + return bs, commit 34 33 35 34 def print_info(car_path: str) -> None: 36 35 print(f"Reading {car_path!r}") 37 36 print(f"Size on disk: {os.stat(car_path).st_size} bytes") 38 - bs, commit, _ = open_car(car_path) 37 + bs, commit = open_car(car_path) 39 38 print("Total CAR blocks:", len(bs.block_offsets)) 40 39 print("Root CID:", bs.car_root.encode("base32")) 41 40 print() ··· 50 49 print("MST root:", commit["data"].encode("base32")) 51 50 52 51 def print_all_records(car_path: str, to_json: bool) -> None: 53 - bs, _, nw = open_car(car_path) 54 - for k, v in nw.iter_kv(): 52 + bs, commit = open_car(car_path) 53 + for k, v in NodeWalker(NodeStore(bs), commit["data"]).iter_kv(): 55 54 if to_json: 56 55 record = dag_cbor.decode(bs.get_block(bytes(v))) 57 56 print(f"{json.dumps(k)} -> {prettify_record(record)}") ··· 66 65 print_all_records(car_path, to_json=True) 67 66 68 67 def dump_record(car_path: str, key: str): 69 - bs, _, nw = open_car(car_path) 70 - val = nw.find_value(key) 68 + bs, commit = open_car(car_path) 69 + val = NodeWalker(NodeStore(bs), commit["data"]).find_value(key) 71 70 if val is None: 72 71 print("Record not found!", file=sys.stderr) 73 72 sys.exit(-1) 74 73 record = dag_cbor.decode(bs.get_block(bytes(val))) 75 74 print(prettify_record(record)) 76 75 77 - def write_block(file, data): 76 + def write_block(file: BinaryIO, data: bytes) -> None: 78 77 file.write(varint.encode(len(data))) 79 78 file.write(data) 80 79 81 80 def compact(car_in: str, car_out: str): 82 - bs, commit, nw = open_car(car_in) 81 + bs, commit = open_car(car_in) 83 82 with open(car_out, "wb") as carfile_out: 84 83 new_header = dag_cbor.encode({ 85 84 "version": 1, ··· 89 88 write_block(carfile_out, bytes(bs.car_root) + dag_cbor.encode(commit)) 90 89 dedup = {bs.car_root} 91 90 92 - for node in nw.iter_nodes(): 91 + for node in NodeWalker(NodeStore(bs), commit["data"]).iter_nodes(): 93 92 if node.cid not in dedup: 94 93 write_block(carfile_out, bytes(node.cid) + node.serialised) 95 94 dedup.add(node.cid) ··· 104 103 return f"{a} -> {b}" 105 104 106 105 def print_record_diff(car_a: str, car_b: str): 107 - bs_a, commit_a, _ = open_car(car_a) 108 - bs_b, commit_b, _ = open_car(car_b) 106 + bs_a, commit_a = open_car(car_a) 107 + bs_b, commit_b = open_car(car_b) 109 108 print(f"Repo: {_delta_str(commit_a['did'], commit_b['did'])}") 110 109 print(f"Revision: {_delta_str(commit_a['rev'], commit_b['rev'])}") 111 110 print(f"Commit: {_delta_str(bs_a.car_root.encode('base32'), bs_b.car_root.encode('base32'))}")
+15 -8
src/atmst/mst/node_store.py
··· 21 21 self.cache = LRU(1024) 22 22 23 23 def get_node(self, cid: Optional[CID]) -> MSTNode: 24 - cached = self.cache.get(cid) 25 - if cached: 24 + if cached := self.cache.get(cid): # look in our LRU cache first 26 25 return cached 27 26 """ 28 27 if cid is None, returns an empty MST node 29 28 """ 30 29 if cid is None: 31 - return self.put_node(MSTNode.empty_root()) 30 + return self.stored_node(MSTNode.empty_root()) 32 31 33 - res = MSTNode.deserialise(self.bs.get_block(bytes(cid))) 34 - self.cache[cid] = res 35 - return res 32 + node_bytes = self.bs.get_block(bytes(cid)) 33 + node = MSTNode.deserialise(node_bytes) 34 + 35 + # prime the cached_properties since we already know their values 36 + object.__setattr__(node, "serialised", node_bytes) 37 + object.__setattr__(node, "cid", cid) 38 + 39 + # prime the node cache 40 + self.cache[cid] = node 41 + 42 + return node 36 43 37 - def put_node(self, node: MSTNode) -> MSTNode: 38 - self.cache[node.cid] = node 44 + def stored_node(self, node: MSTNode) -> MSTNode: 45 + self.cache[node.cid] = node # also put it in the LRU cache 39 46 self.bs.put_block(bytes(node.cid), node.serialised) 40 47 return node # this is convenient 41 48
+9 -9
src/atmst/mst/node_wrangler.py
··· 56 56 if i < len(node.keys) and node.keys[i] == key: 57 57 if node.vals[i] == val: 58 58 return node # we can return our old self if there is no change 59 - return self.ns.put_node(MSTNode( 59 + return self.ns.stored_node(MSTNode( 60 60 keys=node.keys, 61 61 vals=tuple_replace_at(node.vals, i, val), 62 62 subtrees=node.subtrees 63 63 )) 64 64 65 - return self.ns.put_node(MSTNode( 65 + return self.ns.stored_node(MSTNode( 66 66 keys=tuple_insert_at(node.keys, i, key), 67 67 vals=tuple_insert_at(node.vals, i, val), 68 68 subtrees = node.subtrees[:i] + \ ··· 72 72 73 73 def _put_recursive(self, node: MSTNode, key: str, val: CID, key_height: int, tree_height: int) -> MSTNode: 74 74 if key_height > tree_height: # we need to grow the tree 75 - return self.ns.put_node(self._put_recursive( 75 + return self.ns.stored_node(self._put_recursive( 76 76 MSTNode.empty_root(), 77 77 key, val, key_height, tree_height + 1 78 78 )) 79 79 80 80 if key_height < tree_height: # we need to look below 81 81 i = node.gte_index(key) 82 - return self.ns.put_node(MSTNode( 82 + return self.ns.stored_node(MSTNode( 83 83 keys=node.keys, 84 84 vals=node.vals, 85 85 subtrees=tuple_replace_at( ··· 101 101 node = self.ns.get_node(node_cid) 102 102 i = node.gte_index(key) 103 103 lsub, rsub = self._split_on_key(node.subtrees[i], key) 104 - return self.ns.put_node(MSTNode( 104 + return self.ns.stored_node(MSTNode( 105 105 keys=node.keys[:i], 106 106 vals=node.vals[:i], 107 107 subtrees=node.subtrees[:i] + (lsub,) 108 - ))._to_optional(), self.ns.put_node(MSTNode( 108 + ))._to_optional(), self.ns.stored_node(MSTNode( 109 109 keys=node.keys[i:], 110 110 vals=node.vals[i:], 111 111 subtrees=(rsub,) + node.subtrees[i + 1:], ··· 130 130 if key_height < tree_height: # the key must be deleted from a subtree 131 131 if node.subtrees[i] is None: 132 132 return node._to_optional() # the key cannot be in this subtree, no change needed 133 - return self.ns.put_node(MSTNode( 133 + return self.ns.stored_node(MSTNode( 134 134 keys=node.keys, 135 135 vals=node.vals, 136 136 subtrees=tuple_replace_at( ··· 146 146 147 147 assert(node.keys[i] == key) # sanity check (should always be true) 148 148 149 - return self.ns.put_node(MSTNode( 149 + return self.ns.stored_node(MSTNode( 150 150 keys=tuple_remove_at(node.keys, i), 151 151 vals=tuple_remove_at(node.vals, i), 152 152 subtrees=node.subtrees[:i] + ( ··· 161 161 return left_cid 162 162 left = self.ns.get_node(left_cid) 163 163 right = self.ns.get_node(right_cid) 164 - return self.ns.put_node(MSTNode( 164 + return self.ns.stored_node(MSTNode( 165 165 keys=left.keys + right.keys, 166 166 vals=left.vals + right.vals, 167 167 subtrees=left.subtrees[:-1] + (
-5
src/atmst/util.py
··· 1 1 #import hashlib 2 2 from multiformats import multihash, CID 3 - from functools import lru_cache 4 3 5 4 def indent(msg: str) -> str: 6 5 ISTR = " " 7 6 return ISTR + msg.replace("\n", "\n"+ISTR) 8 7 9 - @lru_cache(maxsize=64) # unreasonably effective, lol 10 8 def hash_to_cid(data: bytes, codec="dag-cbor") -> CID: 11 - """ 12 - NB: don't use this function with large blobs! They'll take up too much space in the LRU cache. 13 - """ 14 9 #digest = b"\x12\x20" + hashlib.sha256(data).digest() 15 10 digest = multihash.digest(data, "sha2-256") 16 11 return CID("base32", 1, codec, digest)