this repo has no description
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

Refactor shard splits to use a single write-only transaction

garrison 8a0eff31 06790185

+57 -27
+4
lib/servers/commit_buffer.ex
··· 208 208 read_version_floor = commit_version - mvcc_window() 209 209 {transactions_reversed, old_transactions} = 210 210 Enum.split_with(transactions_reversed, fn %CommitTxn{} = txn -> 211 + # TODO: maybe validate instead of asserting? 212 + assert is_integer(txn.read_version) or txn.read_version == :write_only 213 + 211 214 # TODO: make sure this is not off by one anywhere (e.g. WRT resolvers, storage) 215 + # Note: (:write_only > integer, so :write_only transactions always pass) 212 216 txn.read_version > read_version_floor 213 217 end) 214 218
+29 -18
lib/servers/distributor.ex
··· 179 179 shards = ShardInfoMap.list_shards(state.shard_map) 180 180 181 181 {shards_to_split, _shards} = Enum.split_with(shards, &should_split_shard?/1) 182 - 183 - Enum.each(shards_to_split, fn shard -> 184 - split_shard(state, shard) 185 - end) 182 + :ok = split_shards(state, shards_to_split) 186 183 187 184 state 188 185 end ··· 191 188 defp should_split_shard?(%Shard{to_server_ids: to}) when to != [], do: false 192 189 defp should_split_shard?(%Shard{} = shard), do: shard.stats.size_bytes > @shard_max_size_bytes 193 190 194 - defp split_shard(%State{} = state, %Shard{} = shard) do 191 + defp split_shards(%State{} = state, shards) do 192 + # If too many shards are split at once the transaction could be too large 193 + # This seems very unlikely, but the failure would be metastable and block all splits, 194 + # so we have a limit to be safe 195 + # TODO: buggify this to a lower number for testing? 196 + shards = Enum.take(shards, 100) 197 + ks_pairs = Enum.map(shards, &do_split_shard(state, &1)) 198 + 199 + {:ok, txn} = Transaction.new(state.cluster, write_only: true) 200 + txn = Transaction.write(txn, ks_pairs) 201 + 202 + case Transaction.commit(txn) do 203 + {:ok, _txn} -> :ok 204 + # Note that retryable errors (:transaction_too_old, :read_conflict) are impossible 205 + # for this transaction as it is write-only 206 + # 207 + # If we get a non-retryable error (e.g. :timeout) then we must exit (triggering a recovery) 208 + # to preserve the consistency of the in-memory shard map 209 + # (In practice, if commits are timing out a recovery is likely going to happen anyway) 210 + {:error, _other} -> exit(:shutdown) 211 + end 212 + end 213 + 214 + defp do_split_shard(%State{} = state, %Shard{} = shard) do 195 215 assert shard.to_server_ids == [] 196 216 %ShardStats{midpoint_key: midpoint} = shard.stats 197 217 ··· 204 224 } 205 225 shard = %{shard | end_key: midpoint, stats: nil} 206 226 207 - # TODO: open a transaction with no read version and only handle commit error 208 - with {:ok, txn} <- Transaction.new(state.cluster), 209 - txn = Transaction.write(txn, [to_ks_pair(new_shard)]), 210 - {:ok, _txn} <- Transaction.commit(txn) 211 - do 212 - ShardInfoMap.put(state.shard_map, shard.start_key, shard) 213 - ShardInfoMap.put(state.shard_map, new_shard.start_key, new_shard) 214 - :ok 215 - else 216 - # TODO: we only need to exit for errors which are not retryable (e.g. commit timed out) 217 - # This is necessary to maintain consistency of the in-memory shard map 218 - _ -> exit(:shutdown) 219 - end 227 + ShardInfoMap.put(state.shard_map, shard.start_key, shard) 228 + ShardInfoMap.put(state.shard_map, new_shard.start_key, new_shard) 229 + 230 + to_ks_pair(new_shard) 220 231 end 221 232 222 233 defp tick_shard_moves(%State{} = state) when state.cluster.status != :normal do
+24 -9
lib/transaction.ex
··· 24 24 25 25 @spec new(%Cluster{}) :: {:ok, TxnState.t} | {:error, :timeout} 26 26 def new(%Cluster{} = cluster, opts \\ []) do 27 - if read_version = opts[:read_version] do 28 - {:ok, %TxnState{cluster: cluster, read_version: read_version}} 29 - else 30 - case get_read_version(cluster) do 31 - {:ok, read_version} -> {:ok, %TxnState{cluster: cluster, read_version: read_version}} 32 - {:error, _err} = error -> error 33 - end 27 + cond do 28 + Keyword.get(opts, :write_only) -> 29 + {:ok, %TxnState{cluster: cluster, read_version: :write_only}} 30 + 31 + read_version = Keyword.get(opts, :read_version) -> 32 + {:ok, %TxnState{cluster: cluster, read_version: read_version}} 33 + 34 + true -> 35 + case get_read_version(cluster) do 36 + {:ok, read_version} -> {:ok, %TxnState{cluster: cluster, read_version: read_version}} 37 + {:error, _err} = error -> error 38 + end 34 39 end 35 40 end 36 41 ··· 66 71 end 67 72 68 73 def read(%TxnState{} = txn, keys) when is_list(keys) do 74 + ensure_can_read!(txn) 75 + 69 76 get_shards = fn -> 70 77 buf = random_commit_buffer(txn.cluster) 71 78 CommitBuffer.get_shards_multi(buf, keys) ··· 148 155 @spec read_range(%TxnState{}, binary, binary) :: {:ok, {[{binary, binary}], %TxnState{}}} | {:error, :read_version_too_old} 149 156 def read_range(%TxnState{} = txn, start_key, end_key) 150 157 when is_binary(start_key) and is_binary(end_key) and start_key >= "" and end_key <= "\xFF\xFF" do 158 + ensure_can_read!(txn) 159 + 151 160 get_ranges = fn -> 152 161 buf = random_commit_buffer(txn.cluster) 153 162 case CommitBuffer.get_shards_multi(buf, [{start_key, end_key}]) do ··· 247 256 %TxnState{txn | write_conflicts: [range | txn.write_conflicts]} 248 257 end 249 258 250 - @spec commit(TxnState.t) :: {:ok, %TxnState{}} | {:error, term} 259 + @spec commit(TxnState.t) :: {:ok, %TxnState{}} | {:error, :transaction_too_old | :read_conflict | :database_locked | :timeout} 251 260 def commit(%TxnState{} = txn) do 252 261 commit_txn = %CommitTxn{ 253 262 read_version: txn.read_version, ··· 263 272 {:ok, %{commit_version: commit_version}} -> 264 273 {:ok, %{txn | commit_version: commit_version}} 265 274 266 - {:error, _error} = error -> error 275 + {:error, _err} = error -> error 267 276 end 268 277 end 269 278 ··· 278 287 i = SimServer.deterministic_random(0..(length(commit_buffers) - 1)) 279 288 Enum.at(commit_buffers, i).pid 280 289 end 290 + 291 + defp ensure_can_read!(%TxnState{read_version: :write_only}) do 292 + raise "Cannot perform reads because transaction is write-only!" 293 + end 294 + 295 + defp ensure_can_read!(_txn), do: :noop 281 296 end