···5858 raise EOFError()
5959 return value
60606161- def delete_block(self, key: bytes) -> None:
6161+ def del_block(self, key: bytes) -> None:
6262 raise NotImplementedError("ReadOnlyCARBlockStore does not support delete()")
63636464
+37-41
mst.py
···227227 def __init__(self, ns: NodeStore) -> None:
228228 self.ns = ns
229229230230- def put(self, root_cid: CID, key: str, val: CID) -> CID:
230230+ def put_record(self, root_cid: CID, key: str, val: CID) -> CID:
231231 root = self.ns.get_node(root_cid)
232232 if root.is_empty(): # special case for empty tree
233233 return self._put_here(root, key, val).cid
234234 return self._put_recursive(root, key, val, MSTNode.key_height(key), root.height).cid
235235236236- def delete(self, root_cid: CID, key: str) -> CID:
236236+ def del_record(self, root_cid: CID, key: str) -> CID:
237237 root = self.ns.get_node(root_cid)
238238239239 # Note: the seemingly redundant outer .get().cid is required to transform
···514514 for deleted_key in deleted_kv.keys() - created_kv.keys():
515515 yield ("deleted", deleted_key, deleted_kv[deleted_key].encode("base32")) #XXX: encode() is just for debugging
516516517517+def very_slow_mst_diff(ns, root_a: CID, root_b: CID):
518518+ """
519519+ This should return the same result as mst_diff, but it gets there in a very slow
520520+ yet less error-prone way, so it's useful for testing.
521521+ """
522522+ a_nodes = set(NodeWalker(ns, root_a).iter_node_cids())
523523+ b_nodes = set(NodeWalker(ns, root_b).iter_node_cids())
524524+ return b_nodes - a_nodes, a_nodes - b_nodes
525525+517526EMPTY_NODE_CID = MSTNode.empty_root().cid
518527519528def mst_diff(ns: NodeStore, root_a: CID, root_b: CID) -> Tuple[Set[CID], Set[CID]]: # created_deleted
520520- created, deleted = mst_diff_recursive(NodeWalker(ns, root_a), NodeWalker(ns, root_b))
529529+ created = set() # MST nodes in b but not in a
530530+ deleted = set() # MST nodes in a but not in b
531531+ mst_diff_recursive(created, deleted, NodeWalker(ns, root_a), NodeWalker(ns, root_b))
521532 middle = created & deleted # my algorithm has occasional false-positives
522533 #assert(not middle) # this fails
523534 #print("middle", len(middle))
···530541 created.add(EMPTY_NODE_CID)
531542 return created, deleted
532543533533-def very_slow_mst_diff(ns, root_a: CID, root_b: CID):
534534- """
535535- This should return the same result as mst_diff, but it gets there in a very slow
536536- yet less error-prone way, so it's useful for testing.
537537- """
538538- a_nodes = set(NodeWalker(ns, root_a).iter_node_cids())
539539- b_nodes = set(NodeWalker(ns, root_b).iter_node_cids())
540540- return b_nodes - a_nodes, a_nodes - b_nodes
541541-542542-def mst_diff_recursive(a: NodeWalker, b: NodeWalker) -> Tuple[Set[CID], Set[CID]]: # created, deleted
543543- mst_created = set() # MST nodes in b but not in a
544544- mst_deleted = set() # MST nodes in a but not in b
545545-544544+def mst_diff_recursive(created: Set[CID], deleted: Set[CID], a: NodeWalker, b: NodeWalker): # created, deleted
546545 # the easiest of all cases
547546 if a.frame.node.cid == b.frame.node.cid:
548548- return mst_created, mst_deleted # no difference
547547+ return # no difference
549548550549 # trivial
551550 if a.frame.node.is_empty():
552552- mst_created |= set(b.iter_node_cids())
553553- return mst_created, mst_deleted
551551+ #mst_deleted.add(a.frame.node.cid) # this doesn't work because it might've been a null subtree node
552552+ created |= set(b.iter_node_cids())
553553+ return
554554555555 # likewise
556556 if b.frame.node.is_empty():
557557- mst_deleted |= set(a.iter_node_cids())
558558- return mst_created, mst_deleted
557557+ #mst_created.add(b.frame.node.cid)
558558+ deleted |= set(a.iter_node_cids())
559559+ return
559560560561 # now we're onto the hard part
561562···571572 """
572573573574 # NB: these will end up as false-positives if one tree is a subtree of the other
574574- mst_created.add(b.frame.node.cid)
575575- mst_deleted.add(a.frame.node.cid)
575575+ created.add(b.frame.node.cid)
576576+ deleted.add(a.frame.node.cid)
576577577578 while True:
578579 while a.rkey != b.rkey: # we need a loop because they might "leapfrog" each other
···580581 while a.rkey < b.rkey and not a.is_final:
581582 if a.subtree: # recurse down every subtree
582583 a.down()
583583- mst_deleted.add(a.frame.node.cid)
584584+ deleted.add(a.frame.node.cid)
584585 else:
585586 a.right()
586587···588589 while b.rkey < a.rkey and not b.is_final:
589590 if b.subtree: # recurse down every subtree
590591 b.down()
591591- mst_created.add(b.frame.node.cid)
592592+ created.add(b.frame.node.cid)
592593 else:
593594 b.right()
594595595595- #print(a.rkey, a.stack[0].rkey, b.rkey, a.stack[0].rkey)
596596- #assert(b.rkey == a.rkey)
597597- # the rkeys match, but the subrees below us might not
596596+ # the rkeys now match, but the subrees below us might not
598597599599- c, d = mst_diff_recursive(a.subtree_walker(), b.subtree_walker())
600600- mst_created |= c
601601- mst_deleted |= d
598598+ mst_diff_recursive(created, deleted, a.subtree_walker(), b.subtree_walker())
602599603600 # check if we can still go right XXX: do we need to care about the case where one can, but the other can't?
604601 # To consider: maybe if I just step a, b will catch up automagically
···607604608605 a.right()
609606 b.right()
610610-611611- return mst_created, mst_deleted
607607+612608613609if __name__ == "__main__":
614610 if 0:
···625621 #enumerate_mst(ns, mst_root)
626622 enumerate_mst_range(ns, mst_root, "app.bsky.feed.generator/", "app.bsky.feed.generator/\xff")
627623628628- root2 = wrangler.delete(mst_root, "app.bsky.feed.generator/alttext")
629629- root2 = wrangler.delete(root2, "app.bsky.feed.like/3kas3fyvkti22")
630630- root2 = wrangler.put(root2, "app.bsky.feed.like/3kc3brpic2z2p", hash_to_cid(b"blah"))
624624+ root2 = wrangler.del_record(mst_root, "app.bsky.feed.generator/alttext")
625625+ root2 = wrangler.del_record(root2, "app.bsky.feed.like/3kas3fyvkti22")
626626+ root2 = wrangler.put_record(root2, "app.bsky.feed.like/3kc3brpic2z2p", hash_to_cid(b"blah"))
631627632628 c, d = mst_diff(ns, mst_root, root2)
633629 print("CREATED:")
···649645 wrangler = NodeWrangler(ns)
650646 root = ns.get_node(None).cid
651647 print(ns.pretty(root))
652652- root = wrangler.put(root, "hello", hash_to_cid(b"blah"))
648648+ root = wrangler.put_record(root, "hello", hash_to_cid(b"blah"))
653649 print(ns.pretty(root))
654654- root = wrangler.put(root, "foo", hash_to_cid(b"bar"))
650650+ root = wrangler.put_record(root, "foo", hash_to_cid(b"bar"))
655651 print(ns.pretty(root))
656652 root_a = root
657657- root = wrangler.put(root, "bar", hash_to_cid(b"bat"))
658658- root = wrangler.put(root, "xyzz", hash_to_cid(b"bat"))
659659- root = wrangler.delete(root, "foo")
653653+ root = wrangler.put_record(root, "bar", hash_to_cid(b"bat"))
654654+ root = wrangler.put_record(root, "xyzz", hash_to_cid(b"bat"))
655655+ root = wrangler.del_record(root, "foo")
660656 print("=============")
661657 print(ns.pretty(root_a))
662658 print("=============")
+5-5
mst_test.py
···1313 for _ in range(10240 if PERF_BENCH else random.randrange(0, 32)):
1414 k = random.randbytes(8).hex()
1515 keys.append(k)
1616- root = nw.put(root, k, hash_to_cid(random.randbytes(8)))
1616+ root = nw.put_record(root, k, hash_to_cid(random.randbytes(8)))
1717 root_a = root
1818 for _ in range(8 if PERF_BENCH else random.randrange(0, 8)):
1919 # some random additions
2020- root = nw.put(root, random.randbytes(8).hex(), hash_to_cid(random.randbytes(8)))
2020+ root = nw.put_record(root, random.randbytes(8).hex(), hash_to_cid(random.randbytes(8)))
2121 if keys:
2222 # some random modifications
2323 for _ in range(4 if PERF_BENCH else random.randrange(0, 4)):
2424 for k in random.choice(keys):
2525- root = nw.put(root, k, hash_to_cid(random.randbytes(8)))
2525+ root = nw.put_record(root, k, hash_to_cid(random.randbytes(8)))
2626 # some random deletions
2727 for _ in range(4 if PERF_BENCH else random.randrange(0, 4)):
2828 for k in random.choice(keys):
2929- root = nw.delete(root, k)
2929+ root = nw.del_record(root, k)
30303131 diff_start = time.time()
3232 c, d = mst_diff(ns, root_a, root)
···42424343if __name__ == "__main__":
4444 duration = 0
4545- for _ in range(1 if PERF_BENCH else 200):
4545+ for _ in range(1 if PERF_BENCH else 20000):
4646 duration += random_test()
4747 print("time spent diffing (ms):", duration*1000)