Improve recovery delay and bootstrap behavior · corporate.fm/hobbes@882a446

+4

ROADMAP.md

··· 27 27 - [ ] Move undersized shards to their siblings for merging 28 28 - [ ] Merge shards 29 29 30 + ### Housekeeping 31 + 32 + - [ ] Store replication factor per TLogGeneration and remove all hard-coded references 33 + 30 34 ### Testing 31 35 32 36 - [ ] Fault injection

+56 -14

lib/servers/manager.ex

··· 22 22 cluster: Cluster.t, 23 23 server_monitors: %{reference => non_neg_integer}, 24 24 next_id: non_neg_integer, 25 - recovery_started_timestamp: integer | nil, 25 + recovery_ready_timestamp: integer | nil, 26 26 } 27 27 28 28 @enforce_keys [:coordinators, :primary_coordinator] ··· 34 34 cluster: nil, 35 35 server_monitors: %{}, 36 36 next_id: 0, 37 - recovery_started_timestamp: nil, 37 + recovery_ready_timestamp: nil, 38 38 ] ++ @enforce_keys 39 39 end 40 40 ··· 155 155 generation: generation, 156 156 config: config, 157 157 cluster: cluster, 158 - recovery_started_timestamp: SimServer.current_time(), 159 158 } 160 159 :ok = Coordinator.track_manager_generation(state.primary_coordinator, generation) 161 160 ··· 222 221 # Do nothing if we have already recovered 223 222 defp maybe_start_generation(%State{} = state) when state.cluster.status == :normal, do: state 224 223 225 - @min_recovery_duration_us 2_000_000 226 - 227 224 defp maybe_start_generation(%State{} = state) do 228 225 assert state.cluster.status == :recovering 229 226 ··· 248 245 case has_enough_slots? do 249 246 true -> 250 247 case state.cluster.tlog_generations do 251 - [] -> bootstrap(state) 248 + [] -> maybe_bootstrap(state) 252 249 [_ | _] -> maybe_recover(state) 253 250 end 254 251 ··· 256 253 end 257 254 end 258 255 256 + @recovery_wait_duration_us 1_000_000 257 + 258 + defp maybe_bootstrap(%State{} = state) do 259 + assert state.cluster.status == :recovering 260 + assert length(state.cluster.tlog_generations) == 0 261 + 262 + # Minimum number of zones (currently just ServerSupervisors) required 263 + # to form the seed storage team 264 + min_storage_zones = state.config.num_replicas 265 + 266 + collected_storage_zones = 267 + Enum.count(state.supervisors, fn {_pid, %{storage: storage}} -> 268 + storage > 0 269 + end) 270 + 271 + # TODO: if bootstrap fails *after* spawning the storage servers it's possible to 272 + # reach a state where the next manager is unable to get enough storage slots 273 + # to boostrap the cluster 274 + # (The simulator will probably find this when we do fault injection testing) 275 + # 276 + # Maybe we will need some mechanism to forcibly overwrite existing storage servers 277 + # during bootstrap only 278 + has_enough_storage? = collected_storage_zones >= min_storage_zones 279 + 280 + case has_enough_storage? do 281 + true -> 282 + state = %{state | recovery_ready_timestamp: state.recovery_ready_timestamp || SimServer.current_time()} 283 + elapsed_us = SimServer.current_time() - state.recovery_ready_timestamp 284 + 285 + case elapsed_us > @recovery_wait_duration_us do 286 + true -> bootstrap(state) 287 + false -> state 288 + end 289 + 290 + false -> 291 + assert state.recovery_ready_timestamp == nil 292 + state 293 + end 294 + end 295 + 259 296 defp maybe_recover(%State{} = state) do 260 297 assert state.cluster.status == :recovering 261 298 assert length(state.cluster.tlog_generations) > 0 ··· 265 302 min_prev_tlogs = length(prev_generation.tlog_ids) - 2 266 303 has_enough_prev_tlogs? = length(state.recovered_tlogs) >= min_prev_tlogs 267 304 268 - # Wait until at least 2 seconds have passed to avoid always starting with a minimal set of slots 269 - # TODO: wait until N seconds elapsed *after collecting required slots* instead 270 - elapsed_us = SimServer.current_time() - state.recovery_started_timestamp 271 - enough_elapsed? = elapsed_us > @min_recovery_duration_us 305 + case has_enough_prev_tlogs? do 306 + true -> 307 + state = %{state | recovery_ready_timestamp: state.recovery_ready_timestamp || SimServer.current_time()} 308 + elapsed_us = SimServer.current_time() - state.recovery_ready_timestamp 309 + 310 + case elapsed_us > @recovery_wait_duration_us do 311 + true -> recover(state) 312 + false -> state 313 + end 272 314 273 - case has_enough_prev_tlogs? and enough_elapsed? do 274 - true -> recover(state) 275 - false -> state 315 + false -> 316 + assert state.recovery_ready_timestamp == nil 317 + state 276 318 end 277 319 end 278 320 ··· 289 331 290 332 assert open_stateless_slots >= 5 291 333 assert open_tlog_slots >= state.config.num_replicas 292 - # TODO: this is not guaranteed, we need a maybe_bootstrap() 334 + # TODO: assert storage slots are in independent zones 293 335 assert open_storage_slots >= state.config.num_replicas 294 336 295 337 # 3 -> Sequencer, Resolver, Distributor

+1 -1

lib/workloads.ex

··· 144 144 {cluster_opts, opts} = Keyword.pop(opts, :cluster_opts, []) 145 145 146 146 {:ok, coordinator_pids} = Hobbes.start_cluster(cluster_opts) 147 - SimServer.sleep(1_000) 147 + SimServer.sleep(2_000) 148 148 {:ok, {manager_pid, _gen}} = Coordinator.get_manager(hd(coordinator_pids)) 149 149 {:ok, %Cluster{} = cluster} = Manager.get_cluster(manager_pid) 150 150

+2 -2

test/hobbes_test.exs

··· 274 274 275 275 defp setup_cluster(_context) do 276 276 {:ok, coordinator_pids} = Hobbes.start_cluster([]) 277 - SimServer.sleep(1_000) 277 + SimServer.sleep(2_000) 278 278 {:ok, {manager_pid, _gen}} = Coordinator.get_manager(hd(coordinator_pids)) 279 279 {:ok, %Cluster{} = cluster} = Manager.get_cluster(manager_pid) 280 280 ··· 297 297 298 298 {:ok, coordinators} = Hobbes.start_cluster(opts) 299 299 300 - SimServer.sleep(1_000) 300 + SimServer.sleep(2_000) 301 301 302 302 {:ok, {manager_pid, _gen}} = Coordinator.get_manager(hd(coordinators)) 303 303 {:ok, %Cluster{} = cluster} = Manager.get_cluster(manager_pid)

Configure Feed

Configure Feed