···5353 case :ets.next_lookup(wal, prev_key) do
5454 {{:entry_buffer, i} = key, [{_key, {partition, version, bin}}]} when version <= max_persist_version ->
5555 :ets.delete(wal, key)
5656- write_wal_entry(xks, i, partition, version, bin)
5656+ :ok = write_wal_entry(xks, i, partition, version, bin)
5757+5758 do_flush_entry_buffer(xks, wal, max_persist_version, key)
5959+5860 _ ->
5961 :ok
6062 end
···8284 # otherwise it would not appear in the slots and would never be reclaimed
8385 assert pos > 0
84868585- # The current extent may already be blow wal_gc_version but had not been released
8787+ # The current extent may already be below wal_gc_version but had not been released
8688 # because it was still current
8789 # In that case it should be released immediately
8890 # TODO: this is a rare case that requires the extent to be full before a GC, probe and test
···118120 checksum::binary-16,
119121 >>
120122121121- :ets.insert(wal, {{:buffer, i}, {version, slot}})
123123+ :ets.insert(wal, {{:slot_buffer, i}, {version, slot}})
122124 :ok
123125 end
124126···205207 # Load WAL slot blocks into a list so we have them in the proper order
206208 # (the linked list starts from the tail, but the WAL must be loaded the other way)
207209 {tail_index, tail_checksum} = wal_tail_address
208208- wal_blocks = do_load_wal_blocks(block_store, wal_block_count, tail_index, tail_checksum, [], 0)
210210+ wal_slot_blocks = do_load_wal_slot_blocks(block_store, wal_block_count, tail_index, tail_checksum, [], 0)
209211210212 # Replay the WAL
211211- wal_lv = do_load_wal(wal_blocks, xks, partition_mlbs, 0)
213213+ wal_lv = do_replay_wal_entries(wal_slot_blocks, xks, partition_mlbs, 0)
212214 assert wal_lv >= 0
213215214216 # Compute largest persisted version of any table
···231233 recovered_version
232234 end
233235234234- defp do_load_wal_blocks(_block_store, wal_block_count, _block_index, _block_checksum, blocks_acc, count_acc) when count_acc >= wal_block_count do
236236+ defp do_load_wal_slot_blocks(_block_store, wal_block_count, _block_index, _block_checksum, blocks_acc, count_acc) when count_acc == wal_block_count do
235237 blocks_acc
236238 end
237239238238- defp do_load_wal_blocks(block_store, wal_block_count, block_index, block_checksum, blocks_acc, count_acc) do
240240+ defp do_load_wal_slot_blocks(block_store, wal_block_count, block_index, block_checksum, blocks_acc, count_acc) do
241241+ # The head of a new log will point to the null address but we should never reach it here due to `wal_block_count`
239242 assert block_index != 0
243243+ assert count_acc < wal_block_count
244244+240245 block_data = Blocks.read(block_store, block_index, block_checksum)
241246242247 <<
···247252248253 blocks_acc = [{block_index, block_data} | blocks_acc]
249254 count_acc = count_acc + 1
250250- do_load_wal_blocks(block_store, wal_block_count, prev_index, prev_checksum, blocks_acc, count_acc)
255255+ do_load_wal_slot_blocks(block_store, wal_block_count, prev_index, prev_checksum, blocks_acc, count_acc)
251256 end
252257253253- defp do_load_wal([], _xks, _partition_mlbs, lv_acc) do
258258+ defp do_replay_wal_entries([], _xks, _partition_mlbs, lv_acc) do
254259 lv_acc
255260 end
256261257257- defp do_load_wal([{block_index, block_data} | blocks_rest], xks, partition_mlbs, lv_acc) do
262262+ defp do_replay_wal_entries([{block_index, block_data} | blocks_rest], xks, partition_mlbs, lv_acc) do
258263 %{
259264 block_store: block_store,
260265 wal: wal,
···276281 _padding_and_entries_size::binary,
277282 >> = block_data
278283279279- block_lv = do_load_entries(entries_data, xks, block_store, wal, wal_gc_version, partition_mlbs, 0)
284284+ block_lv = do_replay_entries_from_block(entries_data, xks, block_store, wal, wal_gc_version, partition_mlbs, 0)
280285 :ets.insert(wal, {{:wal_block, block_index}, block_lv})
281286282287 # Version in the log should always be monotonically increasing
283288 assert block_lv >= lv_acc
284289 lv_acc = block_lv
285290286286- do_load_wal(blocks_rest, xks, partition_mlbs, lv_acc)
291291+ do_replay_wal_entries(blocks_rest, xks, partition_mlbs, lv_acc)
287292 end
288293289289- defp do_load_entries("", _xks, _block_store, _wal, _wal_gc_version, _partition_mlbs, lv_acc) do
294294+ defp do_replay_entries_from_block("", _xks, _block_store, _wal, _wal_gc_version, _partition_mlbs, lv_acc) do
290295 lv_acc
291296 end
292297293293- defp do_load_entries(block_data, xks, block_store, wal, wal_gc_version, partition_mlbs, lv_acc) do
298298+ defp do_replay_entries_from_block(block_data, xks, block_store, wal, wal_gc_version, partition_mlbs, lv_acc) do
294299 # See constant: c_wal_slot_bytes()
295300 <<
296301 partition::signed-integer-64,
···300305 bin_size::integer-64,
301306 checksum::binary-16,
302307303303- rest::binary,
308308+ block_rest::binary,
304309 >> = block_data
305310306311 # Slots with (version < wal_gc_version) have already been garbage-collected
···335340 assert version >= lv_acc
336341 lv_acc = version
337342338338- do_load_entries(rest, xks, block_store, wal, wal_gc_version, partition_mlbs, lv_acc)
343343+ do_replay_entries_from_block(block_rest, xks, block_store, wal, wal_gc_version, partition_mlbs, lv_acc)
339344 end
340345341346 @spec flush(XKS.t) :: XKS.t
342347 def flush(%XKS{} = xks) do
348348+ # Ensure all entries <= `max_persist_version` are written
349349+ # Note: flushing an `:entry_buffer` entry will write a `:slot_buffer` entry,
350350+ # so this must be done before the skip optimization below
343351 :ok = flush_entry_buffer(xks)
344352345345- case :ets.next(xks.wal, {:buffer, -1}) do
346346- {:buffer, _i} -> do_flush(xks)
353353+ # Skip the flush if there have been no writes since last time
354354+ # (this avoids copying the tail for no reason)
355355+ case :ets.next(xks.wal, {:slot_buffer, -1}) do
356356+ {:slot_buffer, _i} -> do_flush(xks)
347357 _ -> xks
348358 end
349359 end
···359369 },
360370 } = xks
361371372372+ # Copy-on-write the tail block if it has space, otherwise create a new
373373+ # block pointing to the previous tail
362374 block_acc =
363375 case xks.wal_tail_address do
364376 c_null_address() ->
···396408 end
397409 end
398410399399- prev_key = {:buffer, -1}
411411+ prev_key = {:slot_buffer, -1}
400412 prev_version = 0
401413 wal_tail_address = do_flush_slots(block_store, free_list, wal, opt_block_size, prev_key, prev_version, block_acc)
402414···411423412424 defp do_flush_slots(block_store, free_list, wal, opt_block_size, prev_key, prev_version, block_acc) do
413425 case :ets.next_lookup(wal, prev_key) do
414414- {{:buffer, _i} = key, [{_key, entry}]} ->
426426+ {{:slot_buffer, _i} = key, [{_key, entry}]} ->
415427 :ets.delete(wal, key)
416428 {version, slot} = entry
417429···419431 case (byte_size(block_acc) + byte_size(slot)) > opt_block_size do
420432 true ->
421433 # Pad and rotate block
422422- {block_index, block_checksum} = write_wal_block(block_store, wal, free_list, opt_block_size, block_acc, prev_version)
434434+ {block_index, block_checksum} = write_slots_block(block_store, wal, free_list, opt_block_size, block_acc, prev_version)
423435424436 # Create a new block acc pointing to the previous (written) block
425437 # See constant: c_address_bytes()
···438450 do_flush_slots(block_store, free_list, wal, opt_block_size, prev_key, version, block_acc)
439451440452 _ ->
441441- _address = write_wal_block(block_store, wal, free_list, opt_block_size, block_acc, prev_version)
453453+ _address = write_slots_block(block_store, wal, free_list, opt_block_size, block_acc, prev_version)
442454 end
443455 end
444456445445- defp write_wal_block(block_store, wal, free_list, opt_block_size, block_acc, largest_version) do
457457+ defp write_slots_block(block_store, wal, free_list, opt_block_size, block_acc, largest_version) do
446458 entries_size = byte_size(block_acc) - c_address_bytes()
447459 pad_bytes = opt_block_size - c_address_bytes() - entries_size - c_wal_entries_size_bytes()
448460 assert pad_bytes >= 0