···115115 {table, next_sub, state}
116116 end
117117118118+ # The trailer block is the last block of the table and consists of three sections:
119119+ #
120120+ # | metadata | block_addresses | index_slots |
121121+ #
122122+ # The table metadata is the fixed-length metadata needed to decode the table.
123123+ # It consists of:
124124+ #
125125+ # | block_count (4 bytes) | TODO (4 bytes) | index_slot_count (4 bytes) |
126126+ #
127127+ # Where `block_count` is the number of blocks in the table (sans the trailer block)
128128+ # which is needed to decode the block addresses,
129129+ # and `index_slot_count` is the number of subtables in the table
130130+ # which is needed to decode the index slot array.
118131 defp encode_trailer_block(block_addresses, index_slots_data, index_slot_count) do
119132 {addresses_data, addresses_size} = encode_block_addresses(block_addresses, [], 0)
120133···132145 ]
133146 end
134147148148+ # Block addresses are encoded as:
149149+ #
150150+ # | block_index (8 bytes) | block_checksum (16 bytes) |
151151+ #
152152+ # Where `block_index` and `block_checksum` are the index/checksum of the block.
135153 defp encode_block_addresses([], acc, size_acc) do
136154 {
137155 Enum.reverse(acc),
···161179 Enum.reverse(block_data_reversed, [pad_bytes])
162180 end
163181182182+ # Index blocks consist of a number of variable-length index entries.
183183+ # Entries are aligned to the blocks (i.e. an entry cannot span blocks).
184184+ #
185185+ # Each index entry is encoded as:
186186+ #
187187+ # | key (variable) | version (8 bytes) | block_i (1 byte) | offset (3 bytes) |
188188+ #
189189+ # Where `key` and `version` are are the key/version of the first pair in the subtable,
190190+ # `block_i` is the index of the block *within the table* (i.e. starting from 0) that contains the subtable,
191191+ # and `offset` is the byte offset of the subtable within that block.
192192+ #
193193+ # Because index entries are variable-length, an index slot array is also created and stored in the last block of the table.
194194+ # Each entry in the slot array consists of:
195195+ #
196196+ # | index_block_i (2 bytes) | offset (3 bytes) | key_size (2 bytes) |
197197+ #
198198+ # Where `index_block_i` is the index of the block *within the table* containing the entry,
199199+ # `offset` is the byte offset of the entry within that block,
200200+ # and `key_size` is the size of the entry's key in bytes.
164201 defp do_encode_index(_i, [], [], cur_acc, blocks_acc, size_acc, slots_acc, count) do
165202 {
166203 [{cur_acc, size_acc} | blocks_acc],
···192229 cur_acc = [<<version::integer-64, block_i::integer-8, offset::integer-24>>, key | cur_acc]
193230194231 # length() is not efficient but this will rarely be >0 in practice
232232+ # TODO: use the index of the block within the table instead of starting from 0
195233 index_block_i = length(blocks_acc)
196234197235 # [index_block, slot_offset, key_size]