···11defmodule Hobbes do
22- alias Hobbes.Servers.{Coordinator, ServerSupervisor}
22+ alias Hobbes.Servers.{Coordinator, ServerSupervisor, Manager}
33+ alias Hobbes.Structs.Cluster
3445 defp default_opts do
56 [
···5758 :ok,
5859 coordinator_pids,
5960 }
6161+ end
6262+6363+ @spec refresh_cluster(Cluster.t) :: Cluster.t
6464+ def refresh_cluster(%Cluster{} = cluster) do
6565+ cluster.coordinators
6666+ |> Enum.map(&Coordinator.get_manager_send/1)
6767+ |> Enum.map(&Coordinator.get_manager_receive/1)
6868+ |> Enum.reduce([], fn
6969+ {:ok, result}, acc -> [result | acc]
7070+ {:error, :not_primary}, acc -> acc
7171+ end)
7272+ |> case do
7373+ [_ | _] = results ->
7474+ {manager_pid, _generation} = Enum.max_by(results, fn {_pid, generation} -> generation end)
7575+ case Manager.get_cluster(manager_pid) do
7676+ {:ok, %Cluster{} = new_cluster} -> new_cluster
7777+ {:error, _err} -> cluster
7878+ end
7979+8080+ # Received no responses
8181+ [] -> cluster
8282+ end
6083 end
6184end
+5-1
lib/servers/commit_buffer.ex
···283283 TLog.write_batch_send(tlog_pid, log_batch)
284284 end)
285285 |> Enum.each(fn req_id ->
286286- :ok = TLog.write_batch_receive(req_id)
286286+ case TLog.write_batch_receive(req_id) do
287287+ :ok -> :noop
288288+ # If a commit fails, we trigger recovery (or are already recovering)
289289+ {:error, :timeout} -> exit(:shutdown)
290290+ end
287291 end)
288292289293 # Once all tlogs have replied (made durable), notify sequencer this version is committed
+22-2
lib/servers/coordinator.ex
···145145 SimServer.call(server, :get_manager)
146146 end
147147148148+ @spec get_manager_send(term) :: SimServer.request_id
149149+ def get_manager_send(server) do
150150+ SimServer.send_request(server, :get_manager)
151151+ end
152152+153153+ @spec get_manager_receive(SimServer.request_id) :: {:ok, {pid, non_neg_integer}} | {:error, :not_primary | :timeout}
154154+ def get_manager_receive(req_id) do
155155+ case SimServer.receive_response(req_id, 1000) do
156156+ {:reply, reply} -> reply
157157+ :timeout -> {:error, :timeout}
158158+ end
159159+ end
160160+148161 def inc_generation(server) do
149162 SimServer.call(server, {:request, :inc_generation})
150163 end
···298311 end
299312300313 def handle_call(:get_manager, _from, %State{} = state) do
301301- {:reply, {:ok, state.manager_pid}, state}
314314+ case state.manager_pid do
315315+ nil ->
316316+ {:reply, {:error, :not_primary}, %State{} = state}
317317+ pid when is_pid(pid) ->
318318+ assert is_integer(state.manager_generation)
319319+ {:reply, {:ok, {pid, state.manager_generation}}, state}
320320+ end
302321 end
303322304323 def handle_call({:track_manager_generation, pid, generation}, _from, %State{} = state) when is_pid(pid) and is_integer(generation) do
···9861005 assert state.manager_pid == nil
9871006 assert state.manager_generation == nil
9881007989989- {:ok, manager_pid} = Manager.start_link(%{primary_coordinator: self()})
10081008+ coordinators = Enum.map(state.replica_ids, &Map.fetch!(state.replica_map, &1))
10091009+ {:ok, manager_pid} = Manager.start_link(%{coordinators: coordinators, primary_coordinator: self()})
9901010 %{state | manager_pid: manager_pid}
9911011 end
9921012end