···8181 @tick_shard_moves_interval_ms 1000
82828383 @shard_max_size_bytes 50_000_000
8484- #@shard_max_size_bytes 20_000_000
8484+ #@shard_max_size_bytes 10_000_000
85858686 def start_link(arg), do: SimServer.start_link(__MODULE__, arg)
8787···246246 # i.e. team must be overfilled by at least N shards worth of bytes
247247 # (this also helpfully prevents rebalancing a nearly empty cluster)
248248 @overfilled_threshold_bytes (@shard_max_size_bytes * 3)
249249+ # Only move shards from overfilled teams to teams with this many fewer bytes
250250+ # (the emptiest available team will still be picked)
251251+ @move_to_below_difference_bytes (@shard_max_size_bytes * 2)
249252 # Limit shard move concurrency per-team
250253 # Prevents overloading and improves distribution since we don't currently take in-flight shard bytes into account
251254 @max_team_outgoing_shards 2
252255 @max_team_incoming_shards 1
253256254257 defp chop_overfilled_teams(%State{} = state, team_stats, avg_bytes_per_team) when is_list(team_stats) and is_integer(avg_bytes_per_team) do
255255- overfilled_teams =
256256- Enum.filter(team_stats, fn {_team, %{used_bytes: used_bytes}} ->
257257- used_bytes > (avg_bytes_per_team + @overfilled_threshold_bytes)
258258- end)
259259-260260- Enum.reduce(overfilled_teams, state, fn {%StorageTeam{} = team, _stats}, %State{} = state ->
261261- movable_shards = movable_shards_for_team(state, team)
258258+ team_stats
259259+ |> Enum.filter(fn {_team, %{used_bytes: used_bytes}} ->
260260+ used_bytes > (avg_bytes_per_team + @overfilled_threshold_bytes)
261261+ end)
262262+ # Prioritize the most-filled teams
263263+ |> Enum.sort_by(fn {_team, %{used_bytes: used_bytes}} -> used_bytes end, :desc)
264264+ |> Enum.reduce(state, fn {%StorageTeam{} = team, stats}, %State{} = state ->
265265+ below_threshold = stats.used_bytes - @move_to_below_difference_bytes
262266263267 # Move shards off of this overfilled team until we hit max outgoing shards or we run out of valid target teams
264264- Enum.reduce_while(movable_shards, state, fn %Shard{} = shard, %State{} = state ->
268268+ movable_shards_for_team(state, team)
269269+ |> Enum.shuffle()
270270+ |> Enum.reduce_while(state, fn %Shard{} = shard, %State{} = state ->
265271 with true <- (count_outgoing_shards_for_team(state, team) < @max_team_outgoing_shards),
266266- {:ok, %StorageTeam{} = target_team} <- random_underfilled_team(state, team_stats, avg_bytes_per_team)
272272+ {:ok, %StorageTeam{} = target_team} <- best_team_below(state, team_stats, below_threshold)
267273 do
274274+ # Should be impossible due to threshold
275275+ assert target_team != team
276276+268277 {_result, %State{} = state} = begin_shard_move(state, shard.start_key, target_team.storage_ids)
269278 {:cont, state}
270279 else
···274283 end)
275284 end
276285277277- defp random_underfilled_team(%State{} = state, team_stats, avg_bytes_per_team) when is_list(team_stats) and is_integer(avg_bytes_per_team) do
286286+ defp best_team_below(%State{} = state, team_stats, threshold_bytes) when is_list(team_stats) and is_integer(threshold_bytes) do
278287 team_stats
279288 |> Enum.filter(fn {team, %{used_bytes: used_bytes}} ->
280280- (used_bytes < (avg_bytes_per_team / 2)) and (count_incoming_shards_for_team(state, team) < @max_team_incoming_shards)
289289+ (used_bytes < threshold_bytes) and (count_incoming_shards_for_team(state, team) < @max_team_incoming_shards)
281290 end)
291291+ # This prevents a bias when multiple teams have the same used_bytes
292292+ # (in practice this will only ever happen when there are multiple completely empty teams)
293293+ |> Enum.shuffle()
282294 |> case do
283295 [] ->
284296 :error
285297 teams ->
286286- {%StorageTeam{} = team, _stats} = Enum.random(teams)
298298+ {%StorageTeam{} = team, _stats} = Enum.min_by(teams, fn {_team, %{used_bytes: used_bytes}} -> used_bytes end)
287299 {:ok, team}
288300 end
289301 end