···6868 :ok
6969 end
70707171+ @spec list_overlapping_tables(t, non_neg_integer, non_neg_integer, {binary, non_neg_integer}, {binary, non_neg_integer}) :: [tuple]
7272+ def list_overlapping_tables(manifest, epoch, level, start_key, end_key) do
7373+ {sk_key, sk_ver} = start_key
7474+ # Scan backwards to find the first table visible at `epoch` which is outside of the range (below)
7575+ prev_key =
7676+ case find_prev_table_key(manifest, epoch, level, start_key, {level, sk_key, sk_ver, epoch + 1}, false) do
7777+ {:ok, key} -> key
7878+ # If we hit the start of the level, we use a sentinel at epoch -1
7979+ :error -> {level, "", 0, -1}
8080+ end
8181+8282+ # Start the scan at the `prev_key` found above, which guarantees
8383+ # that any overlapping tables will be found by `next()`
8484+ scan_tables(manifest, epoch, level, end_key, prev_key, [])
8585+ |> Enum.reverse()
8686+ end
8787+8888+ # Scan backwards to find the key of the first table *below* (disjoint from) `start_key` that is visible at `epoch`
8989+ # This key can then be used to start a forward scan and accumulate
9090+ #
9191+ # TODO: there is absolutely no way this is correct right now
9292+ defp find_prev_table_key(manifest, epoch, level, start_key, prev_key, prev_tombstone?) do
9393+ case :ets.prev_lookup(manifest, prev_key) do
9494+ {key, _obj} when prev_tombstone? ->
9595+ # The previous key we saw (i.e. the next key in the keyspace) was a tombstone,
9696+ # so this entry has been deleted
9797+ find_prev_table_key(manifest, epoch, level, start_key, key, false)
9898+9999+ {{^level, _k, _ver, ep} = key, _obj} when ep > epoch ->
100100+ # This entry is not visible at `epoch`
101101+ find_prev_table_key(manifest, epoch, level, start_key, key, false)
102102+103103+ {{^level, _k, _ver, _ep} = key, [{_key, :tombstone}]} ->
104104+ # This entry is a tombstone, so we will ignore the next entry we see (which is previous in the keyspace)
105105+ find_prev_table_key(manifest, epoch, level, start_key, key, true)
106106+107107+ {{^level, _k, _ver, _ep} = key, [{_key, {ek_key, ek_ver, _i, _ck, _id}}]} when {ek_key, ek_ver} > start_key ->
108108+ # This table actually *contains* `start_key`:
109109+ # - table_sk < start_key due to prev()
110110+ # - table_ek > start_key due to guard
111111+ # Therefore: sk < start_key < ek
112112+ find_prev_table_key(manifest, epoch, level, start_key, key, false)
113113+114114+ {{^level, _k, _ver, _ep} = key, _obj} ->
115115+ # This is the first table found which has an ek <= `start_key`,
116116+ # which means it is the first table with a range *below* (disjoint) start_key
117117+ {:ok, key}
118118+119119+ _ ->
120120+ :error
121121+ end
122122+ end
123123+124124+ defp scan_tables(manifest, epoch, level, end_key, prev_key, acc) do
125125+ case :ets.next_lookup(manifest, prev_key) do
126126+ {{^level, _k, _ver, ep} = key, [{_key, _value}]} when ep > epoch ->
127127+ # This entry is not visible at epoch
128128+ scan_tables(manifest, epoch, level, end_key, key, acc)
129129+130130+ {{^level, _k, _ver, _ep} = key, [{_key, :tombstone}]} ->
131131+ # This entry is a tombstone and cancels out the previous entry
132132+ # TODO: assert that it was the same key?
133133+ [_deleted | acc] = acc
134134+ scan_tables(manifest, epoch, level, end_key, key, acc)
135135+136136+ {{^level, sk_key, sk_ver, _ep} = key, [{_key, value}]} when {sk_key, sk_ver} < end_key ->
137137+ # This entry is visible and overlaps, accumulate
138138+ {ek_key, ek_ver, block_index, block_checksum, _id} = value
139139+ table = {:table, block_index, block_checksum, {sk_key, sk_ver}, {ek_key, ek_ver}}
140140+141141+ acc = [table | acc]
142142+ scan_tables(manifest, epoch, level, end_key, key, acc)
143143+144144+ _ ->
145145+ # Reached the end of either the ets table, the level, or the range (end_key)
146146+ acc
147147+ end
148148+ end
149149+71150 @doc false
72151 def dump(manifest) do
73152 :ets.tab2list(manifest)
+9-6
lib/xks/merge.ex
···1414 {_k, _ver} = key -> :gb_trees.insert(key, it, tree)
1515 end
1616 end)
1717+ |> then(&coerce_empty/1)
1718 end
18191920 @spec next(merge_state) :: {term, merge_state | :empty}
···2829 {_k, _ver} = key -> :gb_trees.insert(key, iterator, tree)
2930 end
30313131- tree =
3232- case :gb_trees.is_empty(tree) do
3333- true -> :empty
3434- false -> tree
3535- end
3636-3232+ tree = coerce_empty(tree)
3733 {pair, tree}
3434+ end
3535+3636+ defp coerce_empty(state) do
3737+ case :gb_trees.is_empty(state) do
3838+ true -> :empty
3939+ false -> state
4040+ end
3841 end
39424043 @spec iterator_for_memtable(Memtable.t, binary) :: memtable_iterator
+8
lib/xks/xks.ex
···60606161 @spec get(t, non_neg_integer, binary) :: binary | nil
6262 def get(%XKS{} = xks, version, key) do
6363+ manifest = xks.manifest
6364 epoch = :atomics.get(xks.epoch_atomic, 1)
6565+6666+ _tables = Manifest.list_overlapping_tables(manifest, epoch, 1, {key, version}, {key, version + 1})
64676568 Manifest.list_memtables(xks.manifest, epoch)
6669 |> Enum.find_value(:error, fn {_id, memtable} ->
···78817982 @spec scan(t, non_neg_integer, binary, binary) :: [{binary, binary}]
8083 def scan(%XKS{} = xks, version, start_key, end_key) do
8484+ manifest = xks.manifest
8185 epoch = :atomics.get(xks.epoch_atomic, 1)
8686+8787+ # start_key has version=0 because we have no way of knowing its latest version in the DB
8888+ # end_key has version=0 because the end key is exclusive so we won't actually read it
8989+ _tables = Manifest.list_overlapping_tables(manifest, epoch, 1, {start_key, 0}, {end_key, 0})
82908391 iterators =
8492 Manifest.list_memtables(xks.manifest, epoch)