a geicko-2 based round robin ranking system designed to test c++ battleship submissions battleship.dunkirk.sh
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: implement glicko-2

+185 -53
+173 -42
database.go
··· 15 15 Wins int 16 16 Losses int 17 17 WinPct float64 18 - Elo int 18 + Rating int // Glicko-2 rating 19 + RD int // Rating Deviation (uncertainty) 19 20 AvgMoves float64 20 21 Stage string 21 22 LastPlayed time.Time ··· 68 69 upload_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 69 70 status TEXT DEFAULT 'pending', 70 71 is_active BOOLEAN DEFAULT 1, 71 - elo_rating INTEGER DEFAULT 1500 72 + glicko_rating REAL DEFAULT 1500.0, 73 + glicko_rd REAL DEFAULT 350.0, 74 + glicko_volatility REAL DEFAULT 0.06 72 75 ); 73 76 74 77 CREATE TABLE IF NOT EXISTS tournaments ( ··· 136 139 query := ` 137 140 SELECT 138 141 s.username, 139 - s.elo_rating, 142 + s.glicko_rating, 143 + s.glicko_rd, 140 144 SUM(CASE WHEN m.player1_id = s.id THEN m.player1_wins WHEN m.player2_id = s.id THEN m.player2_wins ELSE 0 END) as total_wins, 141 145 SUM(CASE WHEN m.player1_id = s.id THEN m.player2_wins WHEN m.player2_id = s.id THEN m.player1_wins ELSE 0 END) as total_losses, 142 146 AVG(CASE WHEN m.player1_id = s.id THEN m.player1_moves ELSE m.player2_moves END) as avg_moves, ··· 144 148 FROM submissions s 145 149 LEFT JOIN matches m ON (m.player1_id = s.id OR m.player2_id = s.id) AND m.is_valid = 1 146 150 WHERE s.is_active = 1 147 - GROUP BY s.username, s.elo_rating 151 + GROUP BY s.username, s.glicko_rating, s.glicko_rd 148 152 HAVING COUNT(m.id) > 0 149 - ORDER BY s.elo_rating DESC, total_wins DESC 153 + ORDER BY s.glicko_rating DESC, total_wins DESC 150 154 LIMIT ? 151 155 ` 152 156 ··· 160 164 for rows.Next() { 161 165 var e LeaderboardEntry 162 166 var lastPlayed string 163 - err := rows.Scan(&e.Username, &e.Elo, &e.Wins, &e.Losses, &e.AvgMoves, &lastPlayed) 167 + var rating, rd float64 168 + err := rows.Scan(&e.Username, &rating, &rd, &e.Wins, &e.Losses, &e.AvgMoves, &lastPlayed) 164 169 if err != nil { 165 170 return nil, err 166 171 } 172 + 173 + e.Rating = int(rating) 174 + e.RD = int(rd) 167 175 168 176 // Calculate win percentage 169 177 totalGames := e.Wins + e.Losses ··· 292 300 return submissions, rows.Err() 293 301 } 294 302 295 - func calculateEloChange(player1Rating, player2Rating, player1TotalGames, player2TotalGames int, player1Score float64) (int, int) { 296 - // K-factor: higher for fewer games (more volatile), lower for experienced players 297 - kPlayer1 := 32 298 - kPlayer2 := 32 303 + // Glicko-2 rating system implementation 304 + // Based on Mark Glickman's paper: http://www.glicko.net/glicko/glicko2.pdf 305 + 306 + const ( 307 + glickoTau = 0.5 // System constant (volatility change constraint) 308 + glickoEpsilon = 0.000001 // Convergence tolerance 309 + glicko2Scale = 173.7178 // Conversion factor: rating / 173.7178 310 + ) 311 + 312 + type Glicko2Player struct { 313 + Rating float64 // μ in Glicko-2 scale 314 + RD float64 // φ in Glicko-2 scale 315 + Volatility float64 // σ 316 + } 317 + 318 + type Glicko2Result struct { 319 + OpponentRating float64 320 + OpponentRD float64 321 + Score float64 // 0.0 to 1.0 322 + } 323 + 324 + // Convert rating from standard scale to Glicko-2 scale 325 + func toGlicko2Scale(rating, rd float64) (float64, float64) { 326 + return (rating - 1500.0) / glicko2Scale, rd / glicko2Scale 327 + } 328 + 329 + // Convert rating from Glicko-2 scale to standard scale 330 + func fromGlicko2Scale(mu, phi float64) (float64, float64) { 331 + return mu*glicko2Scale + 1500.0, phi * glicko2Scale 332 + } 333 + 334 + func g(phi float64) float64 { 335 + return 1.0 / math.Sqrt(1.0+3.0*phi*phi/(math.Pi*math.Pi)) 336 + } 337 + 338 + func eFunc(mu, muJ, phiJ float64) float64 { 339 + return 1.0 / (1.0 + math.Exp(-g(phiJ)*(mu-muJ))) 340 + } 341 + 342 + func updateGlicko2(player Glicko2Player, results []Glicko2Result) Glicko2Player { 343 + // Step 2: Convert to Glicko-2 scale 344 + mu, phi := toGlicko2Scale(player.Rating, player.RD) 345 + sigma := player.Volatility 299 346 300 - if player1TotalGames > 500 { 301 - kPlayer1 = 16 347 + if len(results) == 0 { 348 + // No games played - increase RD due to inactivity 349 + phiStar := math.Sqrt(phi*phi + sigma*sigma) 350 + rating, rd := fromGlicko2Scale(mu, phiStar) 351 + return Glicko2Player{Rating: rating, RD: rd, Volatility: sigma} 352 + } 353 + 354 + // Step 3: Compute v (variance) 355 + var vInv float64 356 + for _, result := range results { 357 + muJ, phiJ := toGlicko2Scale(result.OpponentRating, result.OpponentRD) 358 + gPhiJ := g(phiJ) 359 + eVal := eFunc(mu, muJ, phiJ) 360 + vInv += gPhiJ * gPhiJ * eVal * (1.0 - eVal) 361 + } 362 + v := 1.0 / vInv 363 + 364 + // Step 4: Compute delta (improvement) 365 + var delta float64 366 + for _, result := range results { 367 + muJ, phiJ := toGlicko2Scale(result.OpponentRating, result.OpponentRD) 368 + gPhiJ := g(phiJ) 369 + eVal := eFunc(mu, muJ, phiJ) 370 + delta += gPhiJ * (result.Score - eVal) 371 + } 372 + delta *= v 373 + 374 + // Step 5: Determine new volatility using Illinois algorithm 375 + a := math.Log(sigma * sigma) 376 + 377 + deltaSquared := delta * delta 378 + phiSquared := phi * phi 379 + 380 + fFunc := func(x float64) float64 { 381 + eX := math.Exp(x) 382 + num := eX * (deltaSquared - phiSquared - v - eX) 383 + denom := 2.0 * (phiSquared + v + eX) * (phiSquared + v + eX) 384 + return num/denom - (x-a)/(glickoTau*glickoTau) 385 + } 386 + 387 + // Find bounds 388 + A := a 389 + var B float64 390 + if deltaSquared > phiSquared+v { 391 + B = math.Log(deltaSquared - phiSquared - v) 392 + } else { 393 + k := 1.0 394 + for fFunc(a-k*glickoTau) < 0 { 395 + k++ 396 + } 397 + B = a - k*glickoTau 302 398 } 303 - if player2TotalGames > 500 { 304 - kPlayer2 = 16 399 + 400 + // Illinois algorithm iteration 401 + fA := fFunc(A) 402 + fB := fFunc(B) 403 + 404 + for math.Abs(B-A) > glickoEpsilon { 405 + C := A + (A-B)*fA/(fB-fA) 406 + fC := fFunc(C) 407 + 408 + if fC*fB < 0 { 409 + A = B 410 + fA = fB 411 + } else { 412 + fA = fA / 2.0 413 + } 414 + 415 + B = C 416 + fB = fC 305 417 } 306 418 307 - // Expected scores 308 - expectedPlayer1 := 1.0 / (1.0 + math.Pow(10, float64(player2Rating-player1Rating)/400.0)) 309 - expectedPlayer2 := 1.0 / (1.0 + math.Pow(10, float64(player1Rating-player2Rating)/400.0)) 419 + sigmaNew := math.Exp(A / 2.0) 420 + 421 + // Step 6: Update rating deviation 422 + phiStar := math.Sqrt(phiSquared + sigmaNew*sigmaNew) 423 + 424 + // Step 7: Update rating and RD 425 + phiNew := 1.0 / math.Sqrt(1.0/(phiStar*phiStar)+1.0/v) 310 426 311 - // Actual scores (player1Score is win percentage, player2Score is 1-player1Score) 312 - player2Score := 1.0 - player1Score 427 + var muNew float64 428 + for _, result := range results { 429 + muJ, phiJ := toGlicko2Scale(result.OpponentRating, result.OpponentRD) 430 + muNew += g(phiJ) * (result.Score - eFunc(mu, muJ, phiJ)) 431 + } 432 + muNew = mu + phiNew*phiNew*muNew 313 433 314 - // Rating changes based on difference between actual and expected 315 - player1Change := int(float64(kPlayer1) * (player1Score - expectedPlayer1)) 316 - player2Change := int(float64(kPlayer2) * (player2Score - expectedPlayer2)) 434 + // Step 8: Convert back to standard scale 435 + rating, rd := fromGlicko2Scale(muNew, phiNew) 317 436 318 - return player1Change, player2Change 437 + return Glicko2Player{Rating: rating, RD: rd, Volatility: sigmaNew} 319 438 } 320 439 321 - func updateEloRatings(player1ID, player2ID, player1Wins, player2Wins int) error { 322 - // Get current ratings and match counts 323 - var player1Rating, player2Rating, player1Games, player2Games int 440 + func updateGlicko2Ratings(player1ID, player2ID, player1Wins, player2Wins int) error { 441 + // Get current Glicko-2 values for both players 442 + var p1Rating, p1RD, p1Vol, p2Rating, p2RD, p2Vol float64 324 443 325 - err := globalDB.QueryRow(` 326 - SELECT s.elo_rating, 327 - (SELECT COUNT(*) FROM matches m WHERE (m.player1_id = s.id OR m.player2_id = s.id) AND m.is_valid = 1) 328 - FROM submissions s WHERE s.id = ? 329 - `, player1ID).Scan(&player1Rating, &player1Games) 444 + err := globalDB.QueryRow( 445 + "SELECT glicko_rating, glicko_rd, glicko_volatility FROM submissions WHERE id = ?", 446 + player1ID, 447 + ).Scan(&p1Rating, &p1RD, &p1Vol) 330 448 if err != nil { 331 449 return err 332 450 } 333 451 334 - err = globalDB.QueryRow(` 335 - SELECT s.elo_rating, 336 - (SELECT COUNT(*) FROM matches m WHERE (m.player1_id = s.id OR m.player2_id = s.id) AND m.is_valid = 1) 337 - FROM submissions s WHERE s.id = ? 338 - `, player2ID).Scan(&player2Rating, &player2Games) 452 + err = globalDB.QueryRow( 453 + "SELECT glicko_rating, glicko_rd, glicko_volatility FROM submissions WHERE id = ?", 454 + player2ID, 455 + ).Scan(&p2Rating, &p2RD, &p2Vol) 339 456 if err != nil { 340 457 return err 341 458 } 342 459 343 - // Calculate player1's actual score (win percentage) 460 + // Calculate scores 344 461 totalGames := player1Wins + player2Wins 345 462 player1Score := float64(player1Wins) / float64(totalGames) 463 + player2Score := float64(player2Wins) / float64(totalGames) 346 464 347 - // Calculate rating changes based on actual performance 348 - player1Change, player2Change := calculateEloChange(player1Rating, player2Rating, player1Games, player2Games, player1Score) 465 + // Update player 1 466 + p1 := Glicko2Player{Rating: p1Rating, RD: p1RD, Volatility: p1Vol} 467 + p1Results := []Glicko2Result{{OpponentRating: p2Rating, OpponentRD: p2RD, Score: player1Score}} 468 + p1New := updateGlicko2(p1, p1Results) 349 469 350 - // Update ratings 351 - _, err = globalDB.Exec("UPDATE submissions SET elo_rating = ? WHERE id = ?", player1Rating+player1Change, player1ID) 470 + // Update player 2 471 + p2 := Glicko2Player{Rating: p2Rating, RD: p2RD, Volatility: p2Vol} 472 + p2Results := []Glicko2Result{{OpponentRating: p1Rating, OpponentRD: p1RD, Score: player2Score}} 473 + p2New := updateGlicko2(p2, p2Results) 474 + 475 + // Save updated ratings 476 + _, err = globalDB.Exec( 477 + "UPDATE submissions SET glicko_rating = ?, glicko_rd = ?, glicko_volatility = ? WHERE id = ?", 478 + p1New.Rating, p1New.RD, p1New.Volatility, player1ID, 479 + ) 352 480 if err != nil { 353 481 return err 354 482 } 355 483 356 - _, err = globalDB.Exec("UPDATE submissions SET elo_rating = ? WHERE id = ?", player2Rating+player2Change, player2ID) 484 + _, err = globalDB.Exec( 485 + "UPDATE submissions SET glicko_rating = ?, glicko_rd = ?, glicko_volatility = ? WHERE id = ?", 486 + p2New.Rating, p2New.RD, p2New.Volatility, player2ID, 487 + ) 357 488 return err 358 489 } 359 490
+6 -5
model.go
··· 189 189 b.WriteString(lipgloss.NewStyle().Bold(true).Render("🏆 Leaderboard") + "\n\n") 190 190 191 191 // Header without styling on the whole line 192 - b.WriteString(fmt.Sprintf("%-4s %-20s %6s %8s %8s %10s %10s\n", 193 - "Rank", "User", "ELO", "Wins", "Losses", "Win Rate", "Avg Moves")) 192 + b.WriteString(fmt.Sprintf("%-4s %-20s %11s %8s %8s %10s %10s\n", 193 + "Rank", "User", "Rating", "Wins", "Losses", "Win Rate", "Avg Moves")) 194 194 195 195 for i, entry := range entries { 196 196 rank := fmt.Sprintf("#%d", i+1) ··· 207 207 coloredRank = rank 208 208 } 209 209 210 - // Format line with proper spacing 211 - b.WriteString(fmt.Sprintf("%-4s %-20s %6d %8d %8d %9.2f%% %9.1f\n", 212 - coloredRank, entry.Username, entry.Elo, entry.Wins, entry.Losses, entry.WinPct, entry.AvgMoves)) 210 + // Format line with Glicko-2 rating ± RD 211 + ratingStr := fmt.Sprintf("%d±%d", entry.Rating, entry.RD) 212 + b.WriteString(fmt.Sprintf("%-4s %-20s %11s %8d %8d %9.2f%% %9.1f\n", 213 + coloredRank, entry.Username, ratingStr, entry.Wins, entry.Losses, entry.WinPct, entry.AvgMoves)) 213 214 } 214 215 215 216 return b.String()
+3 -3
runner.go
··· 207 207 if err := addMatch(newSub.ID, opponent.ID, winnerID, player1Wins, player2Wins, avgMoves, avgMoves); err != nil { 208 208 log.Printf("Failed to store match result: %v", err) 209 209 } else { 210 - // Update ELO ratings based on actual win percentages 211 - if err := updateEloRatings(newSub.ID, opponent.ID, player1Wins, player2Wins); err != nil { 212 - log.Printf("ELO update failed: %v", err) 210 + // Update Glicko-2 ratings based on actual win percentages 211 + if err := updateGlicko2Ratings(newSub.ID, opponent.ID, player1Wins, player2Wins); err != nil { 212 + log.Printf("Glicko-2 update failed: %v", err) 213 213 } 214 214 215 215 NotifyLeaderboardUpdate()
+3 -3
web.go
··· 313 313 return '<tr>' + 314 314 '<td class="rank rank-' + rank + '">' + medal + '</td>' + 315 315 '<td class="player-name">' + e.Username + '</td>' + 316 - '<td><strong>' + e.Elo + '</strong></td>' + 316 + '<td><strong>' + e.Rating + '</strong> <span style="color: #94a3b8; font-size: 0.85em;">±' + e.RD + '</span></td>' + 317 317 '<td>' + e.Wins.toLocaleString() + '</td>' + 318 318 '<td>' + e.Losses.toLocaleString() + '</td>' + 319 319 '<td><span class="win-rate ' + winRateClass + '">' + winRate + '%</span></td>' + ··· 366 366 <tr> 367 367 <th>Rank</th> 368 368 <th>Player</th> 369 - <th>ELO</th> 369 + <th>Rating</th> 370 370 <th>Wins</th> 371 371 <th>Losses</th> 372 372 <th>Win Rate</th> ··· 380 380 <tr> 381 381 <td class="rank rank-{{add $i 1}}">{{if lt $i 3}}{{medal $i}}{{else}}{{add $i 1}}{{end}}</td> 382 382 <td class="player-name">{{$e.Username}}</td> 383 - <td><strong>{{$e.Elo}}</strong></td> 383 + <td><strong>{{$e.Rating}}</strong> <span style="color: #94a3b8; font-size: 0.85em;">±{{$e.RD}}</span></td> 384 384 <td>{{$e.Wins}}</td> 385 385 <td>{{$e.Losses}}</td> 386 386 <td><span class="win-rate {{winRateClass $e}}">{{winRate $e}}%</span></td>