···210210 :ok
211211 end
212212213213- defp on_tlog_ping(%State{} = state, %TLogStatus{}) when state.cluster.status == :normal do
213213+ defp on_tlog_ping(%State{} = state, %TLogStatus{} = tlog_status) when state.cluster.status == :normal do
214214+ # We keep adding old TLogs back to the cluster as they come back, even if recovery is already complete
215215+ # Otherwise the cluster could get stuck as Storage servers might never learn their pids
216216+ state = add_tlog_to_cluster(state, tlog_status)
214217 state
215218 end
216219···218221 assert state.cluster.status == :recovering
219222 assert length(state.cluster.tlog_generations) > 0
220223224224+ state = add_tlog_to_cluster(state, status)
225225+221226 %TLogGeneration{} = latest_generation = hd(state.cluster.tlog_generations)
222227223228 case status.id in latest_generation.tlog_ids do
···230235 true ->
231236 case Enum.any?(state.recovered_tlogs, &(&1.id == status.id)) do
232237 false ->
233233- server = %Server{type: Hobbes.Servers.TLog, id: status.id, pid: status.pid}
234234- state = put_in(state.cluster.servers[server.id], server)
235238 %{state | recovered_tlogs: [status | state.recovered_tlogs]}
236239 |> maybe_start_generation()
237240···243246244247 # Old TLog, not needed for recovery
245248 false -> state
249249+ end
250250+ end
251251+252252+ defp add_tlog_to_cluster(%State{} = state, %TLogStatus{} = status) do
253253+ case Map.has_key?(state.cluster.servers, status.id) do
254254+ false ->
255255+ server = %Server{type: Hobbes.Servers.TLog, id: status.id, pid: status.pid}
256256+ put_in(state.cluster.servers[server.id], server)
257257+258258+ true -> state
246259 end
247260 end
248261