this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

improve perf with one weird trick (LRU caching)

+12 -11
+1 -1
src/atmst/cartool.py
··· 111 111 "info": (print_info, "print CAR header and repo info"), 112 112 "list": (list_all, "list all records in the CAR (values as CIDs)"), 113 113 "dump": (dump_all, "dump all records in the CAR (values as JSON)"), 114 - "dump_record": (dump_record, "dump a single record keyed on ('collection/rkey')"), 114 + "dump_record": (dump_record, "dump a single record, keyed on ('collection/rkey')"), 115 115 "compact": (compact, "rewrite the whole CAR, dropping any duplicated or unreferenced blocks"), 116 116 } 117 117
+2 -3
src/atmst/mst/node.py
··· 8 8 from dataclasses import dataclass 9 9 from typing import Tuple, Self, Optional 10 10 11 + from ..util import hash_to_cid 11 12 12 13 @dataclass(frozen=True) # frozen == immutable == win 13 14 class MSTNode: ··· 55 56 # since we're immutable, this can be cached 56 57 @cached_property 57 58 def cid(self) -> CID: 58 - digest = multihash.digest(self.serialised, "sha2-256") 59 - cid = CID("base32", 1, "dag-cbor", digest) 60 - return cid 59 + return hash_to_cid(self.serialised) 61 60 62 61 # likewise 63 62 @cached_property
+4 -6
src/atmst/mst/node_store.py
··· 1 1 from typing import Optional, Dict 2 + from functools import lru_cache 2 3 3 4 from multiformats import CID 5 + from lru import LRU 4 6 5 7 from ..blockstore import BlockStore 6 8 from ..util import indent ··· 12 14 for loading and storing MSTNodes 13 15 """ 14 16 bs: BlockStore 15 - cache: Dict[Optional[CID], MSTNode] # XXX: this cache will grow forever! 16 - #cache_counts: Dict[Optional[CID], int] 17 + cache: Dict[Optional[CID], MSTNode] 17 18 18 19 def __init__(self, bs: BlockStore) -> None: 19 20 self.bs = bs 20 - self.cache = {} 21 - #self.cache_counts = {} 21 + self.cache = LRU(1024) 22 22 23 - # TODO: LRU cache this - this package looks ideal: https://github.com/amitdev/lru-dict 24 23 def get_node(self, cid: Optional[CID]) -> MSTNode: 25 24 cached = self.cache.get(cid) 26 25 if cached: ··· 35 34 self.cache[cid] = res 36 35 return res 37 36 38 - # TODO: also put in cache 39 37 def put_node(self, node: MSTNode) -> MSTNode: 40 38 self.cache[node.cid] = node 41 39 self.bs.put_block(bytes(node.cid), node.serialised)
+4
src/atmst/util.py
··· 1 + #import hashlib 1 2 from multiformats import multihash, CID 3 + from functools import lru_cache 2 4 3 5 def indent(msg: str) -> str: 4 6 ISTR = " " 5 7 return ISTR + msg.replace("\n", "\n"+ISTR) 6 8 9 + @lru_cache(maxsize=1024) # unreasonably effective, lol 7 10 def hash_to_cid(data: bytes, codec="dag-cbor") -> CID: 11 + #digest = b"\x12\x20" + hashlib.sha256(data).digest() 8 12 digest = multihash.digest(data, "sha2-256") 9 13 return CID("base32", 1, codec, digest)
+1 -1
tests/test_mst_diff.py
··· 41 41 self.assertEqual(deleted, reference_deleted) 42 42 43 43 if __name__ == '__main__': 44 - unittest.main() 44 + unittest.main(module="tests.test_mst_diff")