A container registry that uses the AT Protocol for manifest storage and S3 for blob storage.
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

clean up old docs add quotas prelim spec

+1289 -1901
-826
docs/API_KEY_MIGRATION.md
··· 1 - # API Key Migration Plan 2 - 3 - ## Overview 4 - 5 - Replace the session token system (used only by credential helper) with API keys that link to OAuth sessions. This simplifies authentication while maintaining all use cases. 6 - 7 - ## Current State 8 - 9 - ### Three Separate Auth Systems 10 - 11 - 1. **Session Tokens** (`pkg/auth/session/`) 12 - - JWT-like tokens: `<base64_claims>.<base64_signature>` 13 - - Created after OAuth callback, shown to user to copy 14 - - User manually pastes into credential helper config 15 - - Validated in `/auth/token` and `/auth/exchange` 16 - - 30-day TTL 17 - - **Problem:** Awkward UX, requires manual copy/paste 18 - 19 - 2. **UI Sessions** (`pkg/appview/session/`) 20 - - Cookie-based (`atcr_session`) 21 - - Random session ID, server-side store 22 - - 24-hour TTL 23 - - **Keep this - works well** 24 - 25 - 3. **App Password Auth** (via PDS) 26 - - Direct `com.atproto.server.createSession` call 27 - - No AppView involvement until token request 28 - - **Keep this - essential for non-UI users** 29 - 30 - ## Target State 31 - 32 - ### Two Auth Methods 33 - 34 - 1. **API Keys** (NEW - replaces session tokens) 35 - - Generated in UI after OAuth login 36 - - Format: `atcr_<32_bytes_base64>` 37 - - Linked to server-side OAuth refresh token 38 - - Multiple keys per user (laptop, CI/CD, etc.) 39 - - Revocable without re-auth 40 - 41 - 2. **App Passwords** (KEEP) 42 - - Direct PDS authentication 43 - - Works without UI/OAuth 44 - 45 - ### UI Sessions (UNCHANGED) 46 - - Cookie-based for web UI 47 - - Separate system, no changes needed 48 - 49 - --- 50 - 51 - ## Implementation Plan 52 - 53 - ### Phase 1: API Key System 54 - 55 - #### 1.1 Create API Key Store (`pkg/appview/apikey/store.go`) 56 - 57 - ```go 58 - package apikey 59 - 60 - import ( 61 - "crypto/rand" 62 - "encoding/base64" 63 - "encoding/json" 64 - "fmt" 65 - "os" 66 - "sync" 67 - "time" 68 - "golang.org/x/crypto/bcrypt" 69 - ) 70 - 71 - // APIKey represents a user's API key 72 - type APIKey struct { 73 - ID string `json:"id"` // UUID 74 - KeyHash string `json:"key_hash"` // bcrypt hash 75 - DID string `json:"did"` // Owner's DID 76 - Handle string `json:"handle"` // Owner's handle 77 - Name string `json:"name"` // User-provided name 78 - CreatedAt time.Time `json:"created_at"` 79 - LastUsed time.Time `json:"last_used"` 80 - } 81 - 82 - // Store manages API keys 83 - type Store struct { 84 - mu sync.RWMutex 85 - keys map[string]*APIKey // keyHash -> APIKey 86 - byDID map[string][]string // DID -> []keyHash 87 - filePath string // /var/lib/atcr/api-keys.json 88 - } 89 - 90 - // NewStore creates a new API key store 91 - func NewStore(filePath string) (*Store, error) 92 - 93 - // Generate creates a new API key and returns the plaintext key (shown once) 94 - func (s *Store) Generate(did, handle, name string) (key string, keyID string, err error) 95 - 96 - // Validate checks if an API key is valid and returns the associated data 97 - func (s *Store) Validate(key string) (*APIKey, error) 98 - 99 - // List returns all API keys for a DID (without plaintext keys) 100 - func (s *Store) List(did string) []*APIKey 101 - 102 - // Delete removes an API key 103 - func (s *Store) Delete(did, keyID string) error 104 - 105 - // UpdateLastUsed updates the last used timestamp 106 - func (s *Store) UpdateLastUsed(keyHash string) error 107 - ``` 108 - 109 - **Key Generation:** 110 - ```go 111 - func (s *Store) Generate(did, handle, name string) (string, string, error) { 112 - // Generate 32 random bytes 113 - b := make([]byte, 32) 114 - if _, err := rand.Read(b); err != nil { 115 - return "", "", err 116 - } 117 - 118 - // Format: atcr_<base64> 119 - key := "atcr_" + base64.RawURLEncoding.EncodeToString(b) 120 - 121 - // Hash for storage 122 - keyHash, err := bcrypt.GenerateFromPassword([]byte(key), bcrypt.DefaultCost) 123 - if err != nil { 124 - return "", "", err 125 - } 126 - 127 - // Generate ID 128 - keyID := generateUUID() 129 - 130 - apiKey := &APIKey{ 131 - ID: keyID, 132 - KeyHash: string(keyHash), 133 - DID: did, 134 - Handle: handle, 135 - Name: name, 136 - CreatedAt: time.Now(), 137 - LastUsed: time.Time{}, // Never used yet 138 - } 139 - 140 - s.mu.Lock() 141 - s.keys[string(keyHash)] = apiKey 142 - s.byDID[did] = append(s.byDID[did], string(keyHash)) 143 - s.mu.Unlock() 144 - 145 - s.save() 146 - 147 - // Return plaintext key (only time it's available) 148 - return key, keyID, nil 149 - } 150 - ``` 151 - 152 - **Key Validation:** 153 - ```go 154 - func (s *Store) Validate(key string) (*APIKey, error) { 155 - s.mu.RLock() 156 - defer s.mu.RUnlock() 157 - 158 - // Try to match against all stored hashes 159 - for hash, apiKey := range s.keys { 160 - if err := bcrypt.CompareHashAndPassword([]byte(hash), []byte(key)); err == nil { 161 - // Update last used asynchronously 162 - go s.UpdateLastUsed(hash) 163 - return apiKey, nil 164 - } 165 - } 166 - 167 - return nil, fmt.Errorf("invalid API key") 168 - } 169 - ``` 170 - 171 - #### 1.2 Add API Key Handlers (`pkg/appview/handlers/apikeys.go`) 172 - 173 - ```go 174 - package handlers 175 - 176 - import ( 177 - "encoding/json" 178 - "html/template" 179 - "net/http" 180 - "github.com/gorilla/mux" 181 - "atcr.io/pkg/appview/apikey" 182 - "atcr.io/pkg/appview/middleware" 183 - ) 184 - 185 - // GenerateAPIKeyHandler handles POST /api/keys 186 - type GenerateAPIKeyHandler struct { 187 - Store *apikey.Store 188 - } 189 - 190 - func (h *GenerateAPIKeyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 191 - user := middleware.GetUser(r) 192 - if user == nil { 193 - http.Error(w, "Unauthorized", http.StatusUnauthorized) 194 - return 195 - } 196 - 197 - name := r.FormValue("name") 198 - if name == "" { 199 - name = "Unnamed Key" 200 - } 201 - 202 - key, keyID, err := h.Store.Generate(user.DID, user.Handle, name) 203 - if err != nil { 204 - http.Error(w, "Failed to generate key", http.StatusInternalServerError) 205 - return 206 - } 207 - 208 - // Return key (shown once!) 209 - w.Header().Set("Content-Type", "application/json") 210 - json.NewEncoder(w).Encode(map[string]string{ 211 - "id": keyID, 212 - "key": key, 213 - }) 214 - } 215 - 216 - // ListAPIKeysHandler handles GET /api/keys 217 - type ListAPIKeysHandler struct { 218 - Store *apikey.Store 219 - } 220 - 221 - func (h *ListAPIKeysHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 222 - user := middleware.GetUser(r) 223 - if user == nil { 224 - http.Error(w, "Unauthorized", http.StatusUnauthorized) 225 - return 226 - } 227 - 228 - keys := h.Store.List(user.DID) 229 - 230 - w.Header().Set("Content-Type", "application/json") 231 - json.NewEncoder(w).Encode(keys) 232 - } 233 - 234 - // DeleteAPIKeyHandler handles DELETE /api/keys/{id} 235 - type DeleteAPIKeyHandler struct { 236 - Store *apikey.Store 237 - } 238 - 239 - func (h *DeleteAPIKeyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 240 - user := middleware.GetUser(r) 241 - if user == nil { 242 - http.Error(w, "Unauthorized", http.StatusUnauthorized) 243 - return 244 - } 245 - 246 - vars := mux.Vars(r) 247 - keyID := vars["id"] 248 - 249 - if err := h.Store.Delete(user.DID, keyID); err != nil { 250 - http.Error(w, "Failed to delete key", http.StatusInternalServerError) 251 - return 252 - } 253 - 254 - w.WriteHeader(http.StatusNoContent) 255 - } 256 - ``` 257 - 258 - ### Phase 2: Update Token Handler 259 - 260 - #### 2.1 Modify `/auth/token` Handler (`pkg/auth/token/handler.go`) 261 - 262 - ```go 263 - type Handler struct { 264 - issuer *Issuer 265 - validator *atproto.SessionValidator 266 - apiKeyStore *apikey.Store // NEW 267 - defaultHoldEndpoint string 268 - } 269 - 270 - func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 271 - username, password, ok := r.BasicAuth() 272 - if !ok { 273 - return unauthorized 274 - } 275 - 276 - var did, handle, accessToken string 277 - 278 - // 1. Check if it's an API key (NEW) 279 - if strings.HasPrefix(password, "atcr_") { 280 - apiKey, err := h.apiKeyStore.Validate(password) 281 - if err != nil { 282 - fmt.Printf("DEBUG [token/handler]: API key validation failed: %v\n", err) 283 - return unauthorized 284 - } 285 - 286 - did = apiKey.DID 287 - handle = apiKey.Handle 288 - fmt.Printf("DEBUG [token/handler]: API key validated for DID=%s, handle=%s\n", did, handle) 289 - 290 - // API key is linked to OAuth session 291 - // OAuth refresher will provide access token when needed via middleware 292 - } 293 - // 2. Try app password (direct PDS) 294 - else { 295 - did, handle, accessToken, err = h.validator.CreateSessionAndGetToken(r.Context(), username, password) 296 - if err != nil { 297 - fmt.Printf("DEBUG [token/handler]: App password validation failed: %v\n", err) 298 - return unauthorized 299 - } 300 - 301 - fmt.Printf("DEBUG [token/handler]: App password validated, DID=%s\n", did) 302 - 303 - // Cache access token for manifest operations 304 - auth.GetGlobalTokenCache().Set(did, accessToken, 2*time.Hour) 305 - 306 - // Ensure profile exists 307 - // ... existing code ... 308 - } 309 - 310 - // Rest of handler: validate access, issue JWT, etc. 311 - // ... existing code ... 312 - } 313 - ``` 314 - 315 - **Key Changes:** 316 - - Remove session token validation (`sessionManager.Validate()`) 317 - - Add API key check as first priority 318 - - Keep app password as fallback 319 - - API keys use OAuth refresher (server-side), app passwords use token cache (client-side) 320 - 321 - #### 2.2 Remove `/auth/exchange` Endpoint 322 - 323 - The `/auth/exchange` endpoint was only used for exchanging session tokens for registry JWTs. With API keys, this is no longer needed. 324 - 325 - **Files to delete:** 326 - - `pkg/auth/exchange/handler.go` 327 - 328 - **Files to update:** 329 - - `cmd/appview/serve.go` - Remove exchange handler registration 330 - 331 - ### Phase 3: Update UI 332 - 333 - #### 3.1 Add API Keys Section to Settings Page 334 - 335 - **Template** (`pkg/appview/templates/settings.html`): 336 - 337 - ```html 338 - <!-- Add after existing profile settings --> 339 - <section class="api-keys"> 340 - <h2>API Keys</h2> 341 - <p>Generate API keys for Docker CLI and CI/CD. Each key is linked to your OAuth session.</p> 342 - 343 - <!-- Generate New Key --> 344 - <div class="generate-key"> 345 - <h3>Generate New API Key</h3> 346 - <form id="generate-key-form"> 347 - <input type="text" id="key-name" placeholder="Key name (e.g., My Laptop)" required> 348 - <button type="submit">Generate Key</button> 349 - </form> 350 - </div> 351 - 352 - <!-- Key Generated Modal (shown once) --> 353 - <div id="key-modal" class="modal hidden"> 354 - <div class="modal-content"> 355 - <h3>✓ API Key Generated!</h3> 356 - <p><strong>Copy this key now - it won't be shown again:</strong></p> 357 - <div class="key-display"> 358 - <code id="generated-key"></code> 359 - <button onclick="copyKey()">Copy to Clipboard</button> 360 - </div> 361 - <div class="usage-instructions"> 362 - <h4>Using with Docker:</h4> 363 - <pre>docker login atcr.io -u <span class="handle">{{.Profile.Handle}}</span> -p <span class="key-placeholder">[paste key here]</span></pre> 364 - </div> 365 - <button onclick="closeModal()">Done</button> 366 - </div> 367 - </div> 368 - 369 - <!-- Existing Keys List --> 370 - <div class="keys-list"> 371 - <h3>Your API Keys</h3> 372 - <table> 373 - <thead> 374 - <tr> 375 - <th>Name</th> 376 - <th>Created</th> 377 - <th>Last Used</th> 378 - <th>Actions</th> 379 - </tr> 380 - </thead> 381 - <tbody id="keys-table"> 382 - <!-- Populated via JavaScript --> 383 - </tbody> 384 - </table> 385 - </div> 386 - </section> 387 - 388 - <script> 389 - // Generate key 390 - document.getElementById('generate-key-form').addEventListener('submit', async (e) => { 391 - e.preventDefault(); 392 - const name = document.getElementById('key-name').value; 393 - 394 - const resp = await fetch('/api/keys', { 395 - method: 'POST', 396 - headers: {'Content-Type': 'application/x-www-form-urlencoded'}, 397 - body: `name=${encodeURIComponent(name)}` 398 - }); 399 - 400 - const data = await resp.json(); 401 - 402 - // Show key in modal (only time it's available) 403 - document.getElementById('generated-key').textContent = data.key; 404 - document.getElementById('key-modal').classList.remove('hidden'); 405 - 406 - // Refresh keys list 407 - loadKeys(); 408 - }); 409 - 410 - // Copy key to clipboard 411 - function copyKey() { 412 - const key = document.getElementById('generated-key').textContent; 413 - navigator.clipboard.writeText(key); 414 - alert('Copied to clipboard!'); 415 - } 416 - 417 - // Load existing keys 418 - async function loadKeys() { 419 - const resp = await fetch('/api/keys'); 420 - const keys = await resp.json(); 421 - 422 - const tbody = document.getElementById('keys-table'); 423 - tbody.innerHTML = keys.map(key => ` 424 - <tr> 425 - <td>${key.name}</td> 426 - <td>${new Date(key.created_at).toLocaleDateString()}</td> 427 - <td>${key.last_used ? new Date(key.last_used).toLocaleDateString() : 'Never'}</td> 428 - <td><button onclick="deleteKey('${key.id}')">Revoke</button></td> 429 - </tr> 430 - `).join(''); 431 - } 432 - 433 - // Delete key 434 - async function deleteKey(id) { 435 - if (!confirm('Are you sure you want to revoke this key?')) return; 436 - 437 - await fetch(`/api/keys/${id}`, { method: 'DELETE' }); 438 - loadKeys(); 439 - } 440 - 441 - // Load keys on page load 442 - loadKeys(); 443 - </script> 444 - 445 - <style> 446 - .modal.hidden { display: none; } 447 - .modal { 448 - position: fixed; 449 - top: 0; 450 - left: 0; 451 - width: 100%; 452 - height: 100%; 453 - background: rgba(0,0,0,0.5); 454 - display: flex; 455 - align-items: center; 456 - justify-content: center; 457 - } 458 - .modal-content { 459 - background: white; 460 - padding: 2rem; 461 - border-radius: 8px; 462 - max-width: 600px; 463 - } 464 - .key-display { 465 - background: #f5f5f5; 466 - padding: 1rem; 467 - margin: 1rem 0; 468 - border-radius: 4px; 469 - } 470 - .key-display code { 471 - word-break: break-all; 472 - font-size: 14px; 473 - } 474 - .usage-instructions { 475 - margin-top: 1rem; 476 - padding: 1rem; 477 - background: #e3f2fd; 478 - border-radius: 4px; 479 - } 480 - .usage-instructions pre { 481 - background: #263238; 482 - color: #aed581; 483 - padding: 1rem; 484 - border-radius: 4px; 485 - overflow-x: auto; 486 - } 487 - .handle { color: #ffab40; } 488 - .key-placeholder { color: #64b5f6; } 489 - </style> 490 - ``` 491 - 492 - #### 3.2 Register API Key Routes (`cmd/appview/serve.go`) 493 - 494 - ```go 495 - // In initializeUI() function, add: 496 - 497 - // API key management routes (authenticated) 498 - authRouter.Handle("/api/keys", &uihandlers.GenerateAPIKeyHandler{ 499 - Store: apiKeyStore, 500 - }).Methods("POST") 501 - 502 - authRouter.Handle("/api/keys", &uihandlers.ListAPIKeysHandler{ 503 - Store: apiKeyStore, 504 - }).Methods("GET") 505 - 506 - authRouter.Handle("/api/keys/{id}", &uihandlers.DeleteAPIKeyHandler{ 507 - Store: apiKeyStore, 508 - }).Methods("DELETE") 509 - ``` 510 - 511 - ### Phase 4: Update Credential Helper 512 - 513 - #### 4.1 Simplify Configuration (`cmd/credential-helper/main.go`) 514 - 515 - ```go 516 - // SessionStore becomes CredentialStore 517 - type CredentialStore struct { 518 - Handle string `json:"handle"` 519 - APIKey string `json:"api_key"` 520 - AppViewURL string `json:"appview_url"` 521 - } 522 - 523 - func handleConfigure(handle string) { 524 - fmt.Println("ATCR Credential Helper Configuration") 525 - fmt.Println("=====================================") 526 - fmt.Println() 527 - fmt.Println("You need an API key from the ATCR web UI.") 528 - fmt.Println() 529 - 530 - appViewURL := os.Getenv("ATCR_APPVIEW_URL") 531 - if appViewURL == "" { 532 - appViewURL = defaultAppViewURL 533 - } 534 - 535 - // Auto-open settings page 536 - settingsURL := appViewURL + "/settings" 537 - fmt.Printf("Opening settings page: %s\n", settingsURL) 538 - fmt.Println("Log in and generate an API key if you haven't already.") 539 - fmt.Println() 540 - 541 - if err := oauth.OpenBrowser(settingsURL); err != nil { 542 - fmt.Printf("Could not open browser. Please visit: %s\n\n", settingsURL) 543 - } 544 - 545 - // Prompt for credentials 546 - if handle == "" { 547 - fmt.Print("Enter your ATProto handle (e.g., alice.bsky.social): ") 548 - fmt.Scanln(&handle) 549 - } else { 550 - fmt.Printf("Using handle: %s\n", handle) 551 - } 552 - 553 - fmt.Print("Enter your API key (from settings page): ") 554 - var apiKey string 555 - fmt.Scanln(&apiKey) 556 - 557 - // Validate key format 558 - if !strings.HasPrefix(apiKey, "atcr_") { 559 - fmt.Fprintf(os.Stderr, "Invalid API key format. Key should start with 'atcr_'\n") 560 - os.Exit(1) 561 - } 562 - 563 - // Save credentials 564 - creds := &CredentialStore{ 565 - Handle: handle, 566 - APIKey: apiKey, 567 - AppViewURL: appViewURL, 568 - } 569 - 570 - if err := saveCredentials(getCredentialsPath(), creds); err != nil { 571 - fmt.Fprintf(os.Stderr, "Error saving credentials: %v\n", err) 572 - os.Exit(1) 573 - } 574 - 575 - fmt.Println() 576 - fmt.Println("✓ Configuration complete!") 577 - fmt.Println("You can now use docker push/pull with atcr.io") 578 - } 579 - 580 - func handleGet() { 581 - var serverURL string 582 - fmt.Fscanln(os.Stdin, &serverURL) 583 - 584 - // Load credentials 585 - creds, err := loadCredentials(getCredentialsPath()) 586 - if err != nil { 587 - fmt.Fprintf(os.Stderr, "Error loading credentials: %v\n", err) 588 - fmt.Fprintf(os.Stderr, "Please run: docker-credential-atcr configure\n") 589 - os.Exit(1) 590 - } 591 - 592 - // Return credentials for Docker 593 - // Docker will send these as Basic Auth to /auth/token 594 - response := Credentials{ 595 - ServerURL: serverURL, 596 - Username: creds.Handle, 597 - Secret: creds.APIKey, // API key as password 598 - } 599 - 600 - json.NewEncoder(os.Stdout).Encode(response) 601 - } 602 - ``` 603 - 604 - **File Rename:** 605 - - `~/.atcr/session.json` → `~/.atcr/credentials.json` 606 - 607 - ### Phase 5: Remove Session Token System 608 - 609 - #### 5.1 Delete Session Token Files 610 - 611 - **Files to delete:** 612 - - `pkg/auth/session/handler.go` 613 - - `pkg/auth/exchange/handler.go` 614 - 615 - #### 5.2 Update OAuth Server (`pkg/auth/oauth/server.go`) 616 - 617 - **Remove session token creation:** 618 - ```go 619 - // OLD (delete this): 620 - sessionToken, err := s.sessionManager.Create(did, handle) 621 - if err != nil { 622 - s.renderError(w, fmt.Sprintf("Failed to create session token: %v", err)) 623 - return 624 - } 625 - 626 - // Check if this is a UI login... 627 - if cookie, err := r.Cookie("oauth_return_to"); err == nil && s.uiSessionStore != nil { 628 - // UI flow... 629 - } else { 630 - // Render success page with session token (for credential helper) 631 - s.renderSuccess(w, sessionToken, handle) 632 - } 633 - ``` 634 - 635 - **NEW (replace with):** 636 - ```go 637 - // Check if this is a UI login 638 - if cookie, err := r.Cookie("oauth_return_to"); err == nil && s.uiSessionStore != nil { 639 - // Create UI session 640 - uiSessionID, err := s.uiSessionStore.Create(did, handle, sessionData.HostURL, 24*time.Hour) 641 - // ... set cookie, redirect ... 642 - } else { 643 - // Non-UI flow: redirect to settings to get API key 644 - s.renderRedirectToSettings(w, handle) 645 - } 646 - ``` 647 - 648 - **Add redirect to settings template:** 649 - ```go 650 - func (s *Server) renderRedirectToSettings(w http.ResponseWriter, handle string) { 651 - tmpl := template.Must(template.New("redirect").Parse(` 652 - <!DOCTYPE html> 653 - <html> 654 - <head> 655 - <title>Authorization Successful - ATCR</title> 656 - <meta http-equiv="refresh" content="3;url=/settings"> 657 - </head> 658 - <body> 659 - <h1>✓ Authorization Successful!</h1> 660 - <p>Redirecting to settings page to generate your API key...</p> 661 - <p>If not redirected, <a href="/settings">click here</a>.</p> 662 - </body> 663 - </html> 664 - `)) 665 - w.Header().Set("Content-Type", "text/html") 666 - tmpl.Execute(w, nil) 667 - } 668 - ``` 669 - 670 - #### 5.3 Update Server Constructor 671 - 672 - ```go 673 - // Remove sessionManager parameter 674 - func NewServer(app *App) *Server { 675 - return &Server{ 676 - app: app, 677 - } 678 - } 679 - ``` 680 - 681 - #### 5.4 Update Registry Initialization (`cmd/appview/serve.go`) 682 - 683 - ```go 684 - // REMOVE session manager creation: 685 - // sessionManager, err := session.NewManagerWithPersistentSecret(secretPath, 30*24*time.Hour) 686 - 687 - // Create API key store 688 - apiKeyStorePath := filepath.Join(filepath.Dir(storagePath), "api-keys.json") 689 - apiKeyStore, err := apikey.NewStore(apiKeyStorePath) 690 - if err != nil { 691 - return fmt.Errorf("failed to create API key store: %w", err) 692 - } 693 - 694 - // OAuth server doesn't need session manager anymore 695 - oauthServer := oauth.NewServer(oauthApp) 696 - oauthServer.SetRefresher(refresher) 697 - if uiSessionStore != nil { 698 - oauthServer.SetUISessionStore(uiSessionStore) 699 - } 700 - 701 - // Token handler gets API key store instead of session manager 702 - if issuer != nil { 703 - tokenHandler := token.NewHandler(issuer, apiKeyStore, defaultHoldEndpoint) 704 - tokenHandler.RegisterRoutes(mux) 705 - 706 - // Remove exchange handler registration (no longer needed) 707 - } 708 - ``` 709 - 710 - --- 711 - 712 - ## Migration Path 713 - 714 - ### For Existing Users 715 - 716 - **Option 1: Smooth Migration (Recommended)** 717 - 1. Keep session token validation temporarily with deprecation warning 718 - 2. When session token is used, log warning and return special response header 719 - 3. Docker client shows warning: "Session tokens deprecated, please regenerate API key" 720 - 4. Remove session token support in next major version 721 - 722 - **Option 2: Hard Cutover** 723 - 1. Deploy new version with API keys 724 - 2. Session tokens stop working immediately 725 - 3. Users must reconfigure: `docker-credential-atcr configure` 726 - 4. Cleaner but disruptive 727 - 728 - ### Rollout Plan 729 - 730 - **Week 1: Deploy API Keys** 731 - - Add API key system 732 - - Keep session token validation 733 - - Add deprecation notice to OAuth callback 734 - 735 - **Week 2-4: Migration Period** 736 - - Monitor API key adoption 737 - - Email users about migration 738 - - Provide migration guide 739 - 740 - **Week 5: Remove Session Tokens** 741 - - Delete session token code 742 - - Force users to API keys 743 - 744 - --- 745 - 746 - ## Testing Plan 747 - 748 - ### Unit Tests 749 - 750 - 1. **API Key Store** 751 - - Test key generation (format, uniqueness) 752 - - Test key validation (correct/incorrect keys) 753 - - Test bcrypt hashing 754 - - Test key listing/deletion 755 - 756 - 2. **Token Handler** 757 - - Test API key authentication 758 - - Test app password authentication 759 - - Test invalid credentials 760 - - Test key format validation 761 - 762 - ### Integration Tests 763 - 764 - 1. **Full Auth Flow** 765 - - UI login → OAuth → API key generation 766 - - Credential helper → API key → registry JWT 767 - - App password → registry JWT 768 - 769 - 2. **Docker Client Tests** 770 - - `docker login -u handle -p api_key` 771 - - `docker login -u handle -p app_password` 772 - - `docker push` with API key 773 - - `docker pull` with API key 774 - 775 - ### Security Tests 776 - 777 - 1. **Key Security** 778 - - Verify bcrypt hashing (not plaintext storage) 779 - - Test key shown only once 780 - - Test key revocation 781 - - Test unauthorized key access 782 - 783 - 2. **OAuth Security** 784 - - Verify API key links to correct OAuth session 785 - - Test expired refresh token handling 786 - - Test multiple keys for same user 787 - 788 - --- 789 - 790 - ## Files Changed 791 - 792 - ### New Files 793 - - `pkg/appview/apikey/store.go` - API key storage and validation 794 - - `pkg/appview/handlers/apikeys.go` - API key HTTP handlers 795 - - `docs/API_KEY_MIGRATION.md` - This document 796 - 797 - ### Modified Files 798 - - `pkg/auth/token/handler.go` - Add API key validation, remove session token 799 - - `pkg/auth/oauth/server.go` - Remove session token creation, redirect to settings 800 - - `pkg/appview/handlers/settings.go` - Add API key management UI 801 - - `pkg/appview/templates/settings.html` - Add API key section 802 - - `cmd/credential-helper/main.go` - Simplify to use API keys 803 - - `cmd/appview/serve.go` - Initialize API key store, remove session manager 804 - 805 - ### Deleted Files 806 - - `pkg/auth/session/handler.go` - Session token system 807 - - `pkg/auth/exchange/handler.go` - Exchange endpoint (no longer needed) 808 - 809 - --- 810 - 811 - ## Advantages 812 - 813 - ✅ **Simpler Auth:** Two methods instead of three (API keys + app passwords) 814 - ✅ **Better UX:** No manual copy/paste of session tokens 815 - ✅ **Multiple Keys:** Users can have laptop key, CI key, etc. 816 - ✅ **Revocable:** Revoke individual keys without re-auth 817 - ✅ **Server-Side OAuth:** Refresh tokens stay on server, not in client files 818 - ✅ **Familiar Pattern:** Matches AWS ECR, GitHub tokens, etc. 819 - 820 - ## Backward Compatibility 821 - 822 - ⚠️ **Breaking Change:** Session tokens will stop working 823 - ✅ **App passwords:** Still work (no changes) 824 - ✅ **UI sessions:** Still work (separate system) 825 - 826 - **Migration Required:** Users with session tokens must run `docker-credential-atcr configure` again to get API keys.
-281
docs/OAUTH.md
··· 1 - # ATCR OAuth Implementation 2 - 3 - ## Overview 4 - 5 - ATCR now supports ATProto OAuth authentication via Docker credential helpers. This allows users to authenticate with their ATProto identity (Bluesky account) and use Docker push/pull commands seamlessly. 6 - 7 - ## Architecture 8 - 9 - ### Components 10 - 11 - 1. **OAuth Client** (`pkg/auth/oauth/`) 12 - - Full ATProto OAuth implementation with DPoP support 13 - - Uses `authelia.com/client/oauth2` for OAuth + PAR 14 - - Uses `github.com/AxisCommunications/go-dpop` for DPoP proof generation 15 - - Automatic authorization server discovery 16 - - PKCE support for security 17 - 18 - 2. **Credential Helper** (`cmd/credential-helper/`) 19 - - Standalone binary: `docker-credential-atcr` 20 - - Implements Docker credential helper protocol 21 - - Manages OAuth flow with browser 22 - - Stores tokens securely in `~/.atcr/oauth-token.json` 23 - 24 - 3. **Registry Integration** 25 - - `/auth/exchange` endpoint exchanges OAuth tokens for registry JWTs 26 - - Existing `/auth/token` endpoint for standard Docker auth 27 - 28 - ## Dependencies 29 - 30 - - `authelia.com/client/oauth2` - OAuth client with PAR support (2⭐, Authelia-backed) 31 - - `github.com/AxisCommunications/go-dpop` - DPoP implementation (10⭐, RFC 9449 compliant) 32 - - `github.com/golang-jwt/jwt/v5` - JWT library (transitive, 11k+⭐) 33 - 34 - ## Usage 35 - 36 - ### Setup 37 - 38 - 1. Build the credential helper: 39 - ```bash 40 - go build -o docker-credential-atcr ./cmd/credential-helper 41 - ``` 42 - 43 - 2. Install it in your PATH: 44 - ```bash 45 - sudo mv docker-credential-atcr /usr/local/bin/ 46 - ``` 47 - 48 - 3. Configure Docker to use it by editing `~/.docker/config.json`: 49 - ```json 50 - { 51 - "credsStore": "atcr" 52 - } 53 - ``` 54 - 55 - ### Configuration 56 - 57 - Run the OAuth flow: 58 - ```bash 59 - docker-credential-atcr configure 60 - ``` 61 - 62 - This will: 63 - 1. Prompt for your ATProto handle (e.g., `alice.bsky.social`) 64 - 2. Open your browser for OAuth authorization 65 - 3. Store the OAuth token and DPoP key in `~/.atcr/oauth-token.json` 66 - 67 - ### Using with Docker 68 - 69 - Once configured, use Docker normally: 70 - 71 - ```bash 72 - # Push an image 73 - docker push atcr.io/alice/myapp:latest 74 - 75 - # Pull an image 76 - docker pull atcr.io/alice/myapp:latest 77 - ``` 78 - 79 - The credential helper automatically: 80 - 1. Loads your stored OAuth token 81 - 2. Refreshes it if expired 82 - 3. Exchanges it for a registry JWT 83 - 4. Provides the JWT to Docker 84 - 85 - ## How It Works 86 - 87 - ### OAuth Flow 88 - 89 - 1. **User runs** `docker-credential-atcr configure` 90 - 2. **Resolve identity**: alice.bsky.social → DID → PDS endpoint 91 - 3. **Discover auth server**: GET `{pds}/.well-known/oauth-authorization-server` 92 - 4. **Generate DPoP key**: ECDSA P-256 key pair 93 - 5. **PAR request**: POST to PAR endpoint with DPoP header + PKCE challenge 94 - 6. **Open browser**: User authorizes on their PDS 95 - 7. **Receive code**: Callback to `localhost:8888/callback` 96 - 8. **Exchange code**: POST to token endpoint with DPoP header + PKCE verifier 97 - 9. **Save tokens**: Store OAuth token + DPoP key + DID/handle 98 - 99 - ### Docker Push/Pull Flow 100 - 101 - 1. **Docker needs credentials** for `atcr.io` 102 - 2. **Calls credential helper**: `docker-credential-atcr get` 103 - 3. **Helper loads token** from `~/.atcr/oauth-token.json` 104 - 4. **Refresh if needed**: Uses refresh token + DPoP if expired 105 - 5. **Exchange for registry JWT**: POST to `/auth/exchange` with OAuth token + handle 106 - 6. **Registry validates token**: Calls `getSession` on PDS to validate token 107 - 7. **Registry issues JWT**: Creates registry JWT with validated DID/handle 108 - 8. **Return to Docker**: `{"Username": "oauth2", "Secret": "<jwt>"}` 109 - 9. **Docker uses JWT**: For authentication to registry API 110 - 111 - ## Security 112 - 113 - ### DPoP (Demonstrating Proof-of-Possession) 114 - 115 - Every OAuth request includes a DPoP proof: 116 - - Unique JWT signed with ECDSA private key 117 - - Contains HTTP method, URL, timestamp, nonce 118 - - Public key (JWK) included in JWT header 119 - - Binds the token to the specific client 120 - 121 - ### PKCE (Proof Key for Code Exchange) 122 - 123 - - Code verifier generated locally 124 - - Code challenge sent in authorization request 125 - - Verifier sent in token exchange 126 - - Prevents authorization code interception 127 - 128 - ### Token Storage 129 - 130 - - Tokens stored in `~/.atcr/oauth-token.json` 131 - - File permissions: 0600 (owner read/write only) 132 - - DPoP key stored in PEM format 133 - - Refresh tokens for long-term access 134 - 135 - ## Implementation Details 136 - 137 - ### Code Structure 138 - 139 - ``` 140 - pkg/auth/oauth/ 141 - ├── client.go # OAuth client with DPoP 142 - ├── discovery.go # Authorization server discovery 143 - ├── metadata.go # Client metadata document 144 - ├── storage.go # Token persistence 145 - └── transport.go # DPoP HTTP transport 146 - 147 - pkg/auth/atproto/ 148 - ├── session.go # ATProto session validation (Basic auth) 149 - └── validator.go # OAuth token validation via getSession 150 - 151 - cmd/credential-helper/ 152 - ├── main.go # Docker credential helper protocol 153 - ├── oauth.go # OAuth flow orchestration 154 - └── token.go # Token management 155 - 156 - pkg/auth/exchange/ 157 - └── handler.go # OAuth → Registry JWT exchange 158 - ``` 159 - 160 - ### Key Classes 161 - 162 - **OAuth Client** (`pkg/auth/oauth/client.go`) 163 - - `NewClient()` - Create client with DPoP key 164 - - `InitializeForHandle()` - Discover auth server 165 - - `AuthorizeURL()` - Generate authorization URL with PAR + PKCE 166 - - `Exchange()` - Exchange code for token with DPoP 167 - - `RefreshToken()` - Refresh expired token with DPoP 168 - 169 - **DPoP Transport** (`pkg/auth/oauth/transport.go`) 170 - - Implements `http.RoundTripper` 171 - - Automatically adds DPoP header to all requests 172 - - Handles nonce management and retries 173 - - Used by OAuth client for all HTTP requests 174 - 175 - **Token Store** (`pkg/auth/oauth/storage.go`) 176 - - Persists OAuth tokens and DPoP key 177 - - PEM encoding for private key 178 - - Expiration checking 179 - - Secure file permissions 180 - 181 - **Token Validator** (`pkg/auth/atproto/validator.go`) 182 - - `ValidateToken()` - Validate token via PDS getSession 183 - - `ValidateTokenWithResolver()` - Auto-resolve PDS from handle 184 - - Returns validated DID and handle 185 - - Used by registry to verify OAuth tokens 186 - 187 - ## Testing 188 - 189 - ### Manual Testing 190 - 191 - 1. Configure the helper: 192 - ```bash 193 - ./docker-credential-atcr configure 194 - # Enter handle: alice.bsky.social 195 - # Browser opens for authorization 196 - # Token saved to ~/.atcr/oauth-token.json 197 - ``` 198 - 199 - 2. Test credential retrieval: 200 - ```bash 201 - echo '{"ServerURL": "atcr.io"}' | ./docker-credential-atcr get 202 - # Should return: {"Username":"oauth2","Secret":"<jwt>"} 203 - ``` 204 - 205 - 3. Test with Docker: 206 - ```bash 207 - docker push atcr.io/alice/test:latest 208 - ``` 209 - 210 - ### Integration Testing 211 - 212 - TODO: Add automated tests for: 213 - - OAuth flow with mock PDS 214 - - DPoP proof generation 215 - - Token exchange 216 - - Credential helper protocol 217 - 218 - ## Security Features 219 - 220 - ### OAuth Token Validation 221 - 222 - The registry validates ATProto OAuth tokens by calling `com.atproto.server.getSession` on the user's PDS. This ensures: 223 - - Token is valid and not expired 224 - - Token belongs to the claimed user 225 - - User's DID and handle are extracted from the PDS response 226 - - No trust in client-provided identity information 227 - 228 - **Flow:** 229 - 1. Client sends OAuth token + handle to `/auth/exchange` 230 - 2. Registry resolves handle → PDS endpoint 231 - 3. Registry calls `{pds}/xrpc/com.atproto.server.getSession` with token 232 - 4. PDS validates token and returns session info (DID, handle) 233 - 5. Registry uses validated DID/handle to issue registry JWT 234 - 235 - ## Future Improvements 236 - 237 - 1. **Token refresh in background** 238 - - Proactively refresh before expiry 239 - - Reduce latency on Docker commands 240 - 241 - 3. **Multiple account support** 242 - - Store tokens for multiple handles 243 - - Allow selecting which account to use 244 - 245 - 4. **Revocation support** 246 - - Implement token revocation 247 - - Clean up on logout 248 - 249 - 5. **Better error messages** 250 - - User-friendly OAuth error handling 251 - - Guide users through common issues 252 - 253 - ## Troubleshooting 254 - 255 - ### "Failed to resolve identity" 256 - - Check internet connection 257 - - Verify handle is correct (e.g., `alice.bsky.social`) 258 - - Ensure PDS is accessible 259 - 260 - ### "Authorization timed out" 261 - - Complete authorization within 5 minutes 262 - - Check if browser opened correctly 263 - - Try running `configure` again 264 - 265 - ### "Token expired" 266 - - Credential helper should auto-refresh 267 - - If persistent, run `configure` again 268 - - Check `~/.atcr/oauth-token.json` permissions 269 - 270 - ### "Failed to exchange token" 271 - - Ensure registry is running 272 - - Check `/auth/exchange` endpoint is accessible 273 - - Verify token hasn't been revoked 274 - 275 - ## References 276 - 277 - - [ATProto OAuth Specification](https://atproto.com/specs/oauth) 278 - - [RFC 9449: DPoP](https://datatracker.ietf.org/doc/html/rfc9449) 279 - - [RFC 9126: PAR](https://datatracker.ietf.org/doc/html/rfc9126) 280 - - [RFC 7636: PKCE](https://datatracker.ietf.org/doc/html/rfc7636) 281 - - [Docker Credential Helpers](https://github.com/docker/docker-credential-helpers)
+1289
docs/QUOTAS.md
··· 1 + # ATCR Quota System 2 + 3 + This document describes ATCR's storage quota implementation, inspired by Harbor's proven approach to per-project blob tracking with deduplication. 4 + 5 + ## Table of Contents 6 + 7 + - [Overview](#overview) 8 + - [Harbor's Approach (Reference Implementation)](#harbors-approach-reference-implementation) 9 + - [Storage Options](#storage-options) 10 + - [Quota Data Model](#quota-data-model) 11 + - [Push Flow (Detailed)](#push-flow-detailed) 12 + - [Delete Flow](#delete-flow) 13 + - [Garbage Collection](#garbage-collection) 14 + - [Quota Reconciliation](#quota-reconciliation) 15 + - [Configuration](#configuration) 16 + - [Trade-offs & Design Decisions](#trade-offs--design-decisions) 17 + - [Future Enhancements](#future-enhancements) 18 + 19 + ## Overview 20 + 21 + ATCR implements per-user storage quotas to: 22 + 1. **Limit storage consumption** on shared hold services 23 + 2. **Track actual S3 costs** (what new data was added) 24 + 3. **Benefit from deduplication** (users only pay once per layer) 25 + 4. **Provide transparency** (show users their storage usage) 26 + 27 + **Key principle:** Users pay for layers they've uploaded, but only ONCE per layer regardless of how many images reference it. 28 + 29 + ### Example Scenario 30 + 31 + ``` 32 + Alice pushes myapp:v1 (layers A, B, C - each 100MB) 33 + → Alice's quota: +300MB (all new layers) 34 + 35 + Alice pushes myapp:v2 (layers A, B, D) 36 + → Layers A, B already claimed by Alice 37 + → Layer D is new (100MB) 38 + → Alice's quota: +100MB (only D is new) 39 + → Total: 400MB 40 + 41 + Bob pushes his-app:latest (layers A, E) 42 + → Layer A already exists in S3 (uploaded by Alice) 43 + → Bob claims it for first time → +100MB to Bob's quota 44 + → Layer E is new → +100MB to Bob's quota 45 + → Bob's quota: 200MB 46 + 47 + Physical S3 storage: 500MB (A, B, C, D, E) 48 + Claimed storage: 600MB (Alice: 400MB, Bob: 200MB) 49 + Deduplication savings: 100MB (layer A shared) 50 + ``` 51 + 52 + ## Harbor's Approach (Reference Implementation) 53 + 54 + Harbor is built on distribution/distribution (same as ATCR) and implements quotas as middleware. Their approach: 55 + 56 + ### Key Insights from Harbor 57 + 58 + 1. **"Shared blobs are only computed once per project"** 59 + - Each project tracks which blobs it has uploaded 60 + - Same blob used in multiple images counts only once per project 61 + - Different projects claiming the same blob each pay for it 62 + 63 + 2. **Quota checked when manifest is pushed** 64 + - Blobs upload first (presigned URLs, can't intercept) 65 + - Manifest pushed last → quota check happens here 66 + - Can reject manifest if quota exceeded (orphaned blobs cleaned by GC) 67 + 68 + 3. **Middleware-based implementation** 69 + - distribution/distribution has NO built-in quota support 70 + - Harbor added it as request preprocessing middleware 71 + - Uses database (PostgreSQL) or Redis for quota storage 72 + 73 + 4. **Per-project ownership model** 74 + - Blobs are physically deduplicated globally 75 + - Quota accounting is logical (per-project claims) 76 + - Total claimed storage can exceed physical storage 77 + 78 + ### References 79 + 80 + - Harbor Quota Documentation: https://goharbor.io/docs/1.10/administration/configure-project-quotas/ 81 + - Harbor Source: https://github.com/goharbor/harbor (see `src/controller/quota`) 82 + 83 + ## Storage Options 84 + 85 + The hold service needs to store quota data somewhere. Two options: 86 + 87 + ### Option 1: S3-Based Storage (Recommended for BYOS) 88 + 89 + Store quota metadata alongside blobs in the same S3 bucket: 90 + 91 + ``` 92 + Bucket structure: 93 + /docker/registry/v2/blobs/sha256/ab/abc123.../data ← actual blobs 94 + /atcr/quota/did:plc:alice.json ← quota tracking 95 + /atcr/quota/did:plc:bob.json 96 + ``` 97 + 98 + **Pros:** 99 + - ✅ No separate database needed 100 + - ✅ Single S3 bucket (better UX - no second bucket to configure) 101 + - ✅ Quota data lives with the blobs 102 + - ✅ Hold service stays relatively stateless 103 + - ✅ Works with any S3-compatible service (Storj, Minio, Upcloud, Fly.io) 104 + 105 + **Cons:** 106 + - ❌ Slower than local database (network round-trip) 107 + - ❌ Eventual consistency issues 108 + - ❌ Race conditions on concurrent updates 109 + - ❌ Extra S3 API costs (GET/PUT per upload) 110 + 111 + **Performance:** 112 + - Each blob upload: 1 HEAD (blob exists?) + 1 GET (quota) + 1 PUT (update quota) 113 + - Typical latency: 100-200ms total overhead 114 + - For high-throughput registries, consider SQLite 115 + 116 + ### Option 2: SQLite Database (Recommended for Shared Holds) 117 + 118 + Local database in hold service: 119 + 120 + ```bash 121 + /var/lib/atcr/hold-quota.db 122 + ``` 123 + 124 + **Pros:** 125 + - ✅ Fast local queries (no network latency) 126 + - ✅ ACID transactions (no race conditions) 127 + - ✅ Efficient for high-throughput registries 128 + - ✅ Can use foreign keys and joins 129 + 130 + **Cons:** 131 + - ❌ Makes hold service stateful (persistent volume needed) 132 + - ❌ Not ideal for ephemeral BYOS deployments 133 + - ❌ Backup/restore complexity 134 + - ❌ Multi-instance scaling requires shared database 135 + 136 + **Schema:** 137 + ```sql 138 + CREATE TABLE user_quotas ( 139 + did TEXT PRIMARY KEY, 140 + quota_limit INTEGER NOT NULL DEFAULT 10737418240, -- 10GB 141 + quota_used INTEGER NOT NULL DEFAULT 0, 142 + updated_at TIMESTAMP 143 + ); 144 + 145 + CREATE TABLE claimed_layers ( 146 + did TEXT NOT NULL, 147 + digest TEXT NOT NULL, 148 + size INTEGER NOT NULL, 149 + claimed_at TIMESTAMP, 150 + PRIMARY KEY(did, digest) 151 + ); 152 + ``` 153 + 154 + ### Recommendation 155 + 156 + - **BYOS (user-owned holds):** S3-based (keeps hold service ephemeral) 157 + - **Shared holds (multi-user):** SQLite (better performance and consistency) 158 + - **High-traffic production:** SQLite or PostgreSQL (Harbor uses this) 159 + 160 + ## Quota Data Model 161 + 162 + ### Quota File Format (S3-based) 163 + 164 + ```json 165 + { 166 + "did": "did:plc:alice123", 167 + "limit": 10737418240, 168 + "used": 5368709120, 169 + "claimed_layers": { 170 + "sha256:abc123...": 104857600, 171 + "sha256:def456...": 52428800, 172 + "sha256:789ghi...": 209715200 173 + }, 174 + "last_updated": "2025-10-09T12:34:56Z", 175 + "version": 1 176 + } 177 + ``` 178 + 179 + **Fields:** 180 + - `did`: User's ATProto DID 181 + - `limit`: Maximum storage in bytes (default: 10GB) 182 + - `used`: Current storage usage in bytes (sum of claimed_layers) 183 + - `claimed_layers`: Map of digest → size for all layers user has uploaded 184 + - `last_updated`: Timestamp of last quota update 185 + - `version`: Schema version for future migrations 186 + 187 + ### Why Track Individual Layers? 188 + 189 + **Q: Can't we just track a counter?** 190 + 191 + **A: We need layer tracking for:** 192 + 193 + 1. **Deduplication detection** 194 + - Check if user already claimed a layer → free upload 195 + - Example: Updating an image reuses most layers 196 + 197 + 2. **Accurate deletes** 198 + - When manifest deleted, only decrement unclaimed layers 199 + - User may have 5 images sharing layer A - deleting 1 image doesn't free layer A 200 + 201 + 3. **Quota reconciliation** 202 + - Verify quota matches reality by listing user's manifests 203 + - Recalculate from layers in manifests vs claimed_layers map 204 + 205 + 4. **Auditing** 206 + - "Show me what I'm storing" 207 + - Users can see which layers consume their quota 208 + 209 + ## Push Flow (Detailed) 210 + 211 + ### Step-by-Step: User Pushes Image 212 + 213 + ``` 214 + ┌──────────┐ ┌──────────┐ ┌──────────┐ 215 + │ Client │ │ Hold │ │ S3 │ 216 + │ (Docker) │ │ Service │ │ Bucket │ 217 + └──────────┘ └──────────┘ └──────────┘ 218 + │ │ │ 219 + │ 1. PUT /v2/.../blobs/ │ │ 220 + │ upload?digest=sha256:abc│ │ 221 + ├───────────────────────────>│ │ 222 + │ │ │ 223 + │ │ 2. Check if blob exists │ 224 + │ │ (Stat/HEAD request) │ 225 + │ ├───────────────────────────>│ 226 + │ │<───────────────────────────┤ 227 + │ │ 200 OK (exists) or │ 228 + │ │ 404 Not Found │ 229 + │ │ │ 230 + │ │ 3. Read user quota │ 231 + │ │ GET /atcr/quota/{did} │ 232 + │ ├───────────────────────────>│ 233 + │ │<───────────────────────────┤ 234 + │ │ quota.json │ 235 + │ │ │ 236 + │ │ 4. Calculate quota impact │ 237 + │ │ - If digest in │ 238 + │ │ claimed_layers: 0 │ 239 + │ │ - Else: size │ 240 + │ │ │ 241 + │ │ 5. Check quota limit │ 242 + │ │ used + impact <= limit? │ 243 + │ │ │ 244 + │ │ 6. Update quota │ 245 + │ │ PUT /atcr/quota/{did} │ 246 + │ ├───────────────────────────>│ 247 + │ │<───────────────────────────┤ 248 + │ │ 200 OK │ 249 + │ │ │ 250 + │ 7. Presigned URL │ │ 251 + │<───────────────────────────┤ │ 252 + │ {url: "https://s3..."} │ │ 253 + │ │ │ 254 + │ 8. Upload blob to S3 │ │ 255 + ├────────────────────────────┼───────────────────────────>│ 256 + │ │ │ 257 + │ 9. 200 OK │ │ 258 + │<───────────────────────────┼────────────────────────────┤ 259 + │ │ │ 260 + ``` 261 + 262 + ### Implementation (Pseudocode) 263 + 264 + ```go 265 + // cmd/hold/main.go - HandlePutPresignedURL 266 + 267 + func (s *HoldService) HandlePutPresignedURL(w http.ResponseWriter, r *http.Request) { 268 + var req PutPresignedURLRequest 269 + json.NewDecoder(r.Body).Decode(&req) 270 + 271 + // Step 1: Check if blob already exists in S3 272 + blobPath := fmt.Sprintf("/docker/registry/v2/blobs/%s/%s/%s/data", 273 + algorithm, digest[:2], digest) 274 + 275 + _, err := s.driver.Stat(ctx, blobPath) 276 + blobExists := (err == nil) 277 + 278 + // Step 2: Read quota from S3 (or SQLite) 279 + quota, err := s.quotaManager.GetQuota(req.DID) 280 + if err != nil { 281 + // First upload - create quota with defaults 282 + quota = &Quota{ 283 + DID: req.DID, 284 + Limit: s.config.QuotaDefaultLimit, 285 + Used: 0, 286 + ClaimedLayers: make(map[string]int64), 287 + } 288 + } 289 + 290 + // Step 3: Calculate quota impact 291 + quotaImpact := req.Size // Default: assume new layer 292 + 293 + if _, alreadyClaimed := quota.ClaimedLayers[req.Digest]; alreadyClaimed { 294 + // User already uploaded this layer before 295 + quotaImpact = 0 296 + log.Printf("Layer %s already claimed by %s, no quota impact", 297 + req.Digest, req.DID) 298 + } else if blobExists { 299 + // Blob exists in S3 (uploaded by another user) 300 + // But this user is claiming it for first time 301 + // Still counts against their quota 302 + log.Printf("Layer %s exists globally but new to %s, quota impact: %d", 303 + req.Digest, req.DID, quotaImpact) 304 + } else { 305 + // Brand new blob - will be uploaded to S3 306 + log.Printf("New layer %s for %s, quota impact: %d", 307 + req.Digest, req.DID, quotaImpact) 308 + } 309 + 310 + // Step 4: Check quota limit 311 + if quota.Used + quotaImpact > quota.Limit { 312 + http.Error(w, fmt.Sprintf( 313 + "quota exceeded: used=%d, impact=%d, limit=%d", 314 + quota.Used, quotaImpact, quota.Limit, 315 + ), http.StatusPaymentRequired) // 402 316 + return 317 + } 318 + 319 + // Step 5: Update quota (optimistic - before upload completes) 320 + quota.Used += quotaImpact 321 + if quotaImpact > 0 { 322 + quota.ClaimedLayers[req.Digest] = req.Size 323 + } 324 + quota.LastUpdated = time.Now() 325 + 326 + if err := s.quotaManager.SaveQuota(quota); err != nil { 327 + http.Error(w, "failed to update quota", http.StatusInternalServerError) 328 + return 329 + } 330 + 331 + // Step 6: Generate presigned URL 332 + presignedURL, err := s.getUploadURL(ctx, req.Digest, req.Size, req.DID) 333 + if err != nil { 334 + // Rollback quota update on error 335 + quota.Used -= quotaImpact 336 + delete(quota.ClaimedLayers, req.Digest) 337 + s.quotaManager.SaveQuota(quota) 338 + 339 + http.Error(w, "failed to generate presigned URL", http.StatusInternalServerError) 340 + return 341 + } 342 + 343 + // Step 7: Return presigned URL + quota info 344 + resp := PutPresignedURLResponse{ 345 + URL: presignedURL, 346 + ExpiresAt: time.Now().Add(15 * time.Minute), 347 + QuotaInfo: QuotaInfo{ 348 + Used: quota.Used, 349 + Limit: quota.Limit, 350 + Available: quota.Limit - quota.Used, 351 + Impact: quotaImpact, 352 + AlreadyClaimed: quotaImpact == 0, 353 + }, 354 + } 355 + 356 + w.Header().Set("Content-Type", "application/json") 357 + json.NewEncoder(w).Encode(resp) 358 + } 359 + ``` 360 + 361 + ### Race Condition Handling 362 + 363 + **Problem:** Two concurrent uploads of the same blob 364 + 365 + ``` 366 + Time User A User B 367 + 0ms Upload layer X (100MB) 368 + 10ms Upload layer X (100MB) 369 + 20ms Check exists: NO Check exists: NO 370 + 30ms Quota impact: 100MB Quota impact: 100MB 371 + 40ms Update quota A: +100MB Update quota B: +100MB 372 + 50ms Generate presigned URL Generate presigned URL 373 + 100ms Upload to S3 completes Upload to S3 (overwrites A's) 374 + ``` 375 + 376 + **Result:** Both users charged 100MB, but only 100MB stored in S3. 377 + 378 + **Mitigation strategies:** 379 + 380 + 1. **Accept eventual consistency** (recommended for S3-based) 381 + - Run periodic reconciliation to fix discrepancies 382 + - Small inconsistency window (minutes) is acceptable 383 + - Reconciliation uses PDS as source of truth 384 + 385 + 2. **Optimistic locking** (S3 ETags) 386 + ```go 387 + // Use S3 ETags for conditional writes 388 + oldETag := getQuotaFileETag(did) 389 + err := putQuotaFileWithCondition(quota, oldETag) 390 + if err == PreconditionFailed { 391 + // Retry with fresh read 392 + } 393 + ``` 394 + 395 + 3. **Database transactions** (SQLite-based) 396 + ```sql 397 + BEGIN TRANSACTION; 398 + SELECT * FROM user_quotas WHERE did = ? FOR UPDATE; 399 + UPDATE user_quotas SET used = used + ? WHERE did = ?; 400 + COMMIT; 401 + ``` 402 + 403 + ## Delete Flow 404 + 405 + ### Manifest Deletion via AppView UI 406 + 407 + When a user deletes a manifest through the AppView web interface: 408 + 409 + ``` 410 + ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ 411 + │ User │ │ AppView │ │ Hold │ │ PDS │ 412 + │ UI │ │ Database │ │ Service │ │ │ 413 + └──────────┘ └──────────┘ └──────────┘ └──────────┘ 414 + │ │ │ │ 415 + │ DELETE manifest │ │ │ 416 + ├─────────────────────>│ │ │ 417 + │ │ │ │ 418 + │ │ 1. Get manifest │ │ 419 + │ │ and layers │ │ 420 + │ │ │ │ 421 + │ │ 2. Check which │ │ 422 + │ │ layers still │ │ 423 + │ │ referenced by │ │ 424 + │ │ user's other │ │ 425 + │ │ manifests │ │ 426 + │ │ │ │ 427 + │ │ 3. DELETE manifest │ │ 428 + │ │ from PDS │ │ 429 + │ ├──────────────────────┼─────────────────────>│ 430 + │ │ │ │ 431 + │ │ 4. POST /quota/decrement │ 432 + │ ├─────────────────────>│ │ 433 + │ │ {layers: [...]} │ │ 434 + │ │ │ │ 435 + │ │ │ 5. Update quota │ 436 + │ │ │ Remove unclaimed │ 437 + │ │ │ layers │ 438 + │ │ │ │ 439 + │ │ 6. 200 OK │ │ 440 + │ │<─────────────────────┤ │ 441 + │ │ │ │ 442 + │ │ 7. Delete from DB │ │ 443 + │ │ │ │ 444 + │ 8. Success │ │ │ 445 + │<─────────────────────┤ │ │ 446 + │ │ │ │ 447 + ``` 448 + 449 + ### AppView Implementation 450 + 451 + ```go 452 + // pkg/appview/handlers/manifest.go 453 + 454 + func (h *ManifestHandler) DeleteManifest(w http.ResponseWriter, r *http.Request) { 455 + did := r.Context().Value("auth.did").(string) 456 + repository := chi.URLParam(r, "repository") 457 + digest := chi.URLParam(r, "digest") 458 + 459 + // Step 1: Get manifest and its layers from database 460 + manifest, err := db.GetManifest(h.db, digest) 461 + if err != nil { 462 + http.Error(w, "manifest not found", 404) 463 + return 464 + } 465 + 466 + layers, err := db.GetLayersForManifest(h.db, manifest.ID) 467 + if err != nil { 468 + http.Error(w, "failed to get layers", 500) 469 + return 470 + } 471 + 472 + // Step 2: For each layer, check if user still references it 473 + // in other manifests 474 + layersToDecrement := []LayerInfo{} 475 + 476 + for _, layer := range layers { 477 + // Query: does this user have other manifests using this layer? 478 + stillReferenced, err := db.CheckLayerReferencedByUser( 479 + h.db, did, repository, layer.Digest, manifest.ID, 480 + ) 481 + 482 + if err != nil { 483 + http.Error(w, "failed to check layer references", 500) 484 + return 485 + } 486 + 487 + if !stillReferenced { 488 + // This layer is no longer used by user 489 + layersToDecrement = append(layersToDecrement, LayerInfo{ 490 + Digest: layer.Digest, 491 + Size: layer.Size, 492 + }) 493 + } 494 + } 495 + 496 + // Step 3: Delete manifest from user's PDS 497 + atprotoClient := atproto.NewClient(manifest.PDSEndpoint, did, accessToken) 498 + err = atprotoClient.DeleteRecord(ctx, atproto.ManifestCollection, manifestRKey) 499 + if err != nil { 500 + http.Error(w, "failed to delete from PDS", 500) 501 + return 502 + } 503 + 504 + // Step 4: Notify hold service to decrement quota 505 + if len(layersToDecrement) > 0 { 506 + holdClient := &http.Client{} 507 + 508 + decrementReq := QuotaDecrementRequest{ 509 + DID: did, 510 + Layers: layersToDecrement, 511 + } 512 + 513 + body, _ := json.Marshal(decrementReq) 514 + resp, err := holdClient.Post( 515 + manifest.HoldEndpoint + "/quota/decrement", 516 + "application/json", 517 + bytes.NewReader(body), 518 + ) 519 + 520 + if err != nil || resp.StatusCode != 200 { 521 + log.Printf("Warning: failed to update quota on hold service: %v", err) 522 + // Continue anyway - GC reconciliation will fix it 523 + } 524 + } 525 + 526 + // Step 5: Delete from AppView database 527 + err = db.DeleteManifest(h.db, did, repository, digest) 528 + if err != nil { 529 + http.Error(w, "failed to delete from database", 500) 530 + return 531 + } 532 + 533 + w.WriteHeader(http.StatusNoContent) 534 + } 535 + ``` 536 + 537 + ### Hold Service Decrement Endpoint 538 + 539 + ```go 540 + // cmd/hold/main.go 541 + 542 + type QuotaDecrementRequest struct { 543 + DID string `json:"did"` 544 + Layers []LayerInfo `json:"layers"` 545 + } 546 + 547 + type LayerInfo struct { 548 + Digest string `json:"digest"` 549 + Size int64 `json:"size"` 550 + } 551 + 552 + func (s *HoldService) HandleQuotaDecrement(w http.ResponseWriter, r *http.Request) { 553 + var req QuotaDecrementRequest 554 + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { 555 + http.Error(w, "invalid request", 400) 556 + return 557 + } 558 + 559 + // Read current quota 560 + quota, err := s.quotaManager.GetQuota(req.DID) 561 + if err != nil { 562 + http.Error(w, "quota not found", 404) 563 + return 564 + } 565 + 566 + // Decrement quota for each layer 567 + for _, layer := range req.Layers { 568 + if size, claimed := quota.ClaimedLayers[layer.Digest]; claimed { 569 + // Remove from claimed layers 570 + delete(quota.ClaimedLayers, layer.Digest) 571 + quota.Used -= size 572 + 573 + log.Printf("Decremented quota for %s: layer %s (%d bytes)", 574 + req.DID, layer.Digest, size) 575 + } else { 576 + log.Printf("Warning: layer %s not in claimed_layers for %s", 577 + layer.Digest, req.DID) 578 + } 579 + } 580 + 581 + // Ensure quota.Used doesn't go negative (defensive) 582 + if quota.Used < 0 { 583 + log.Printf("Warning: quota.Used went negative for %s, resetting to 0", req.DID) 584 + quota.Used = 0 585 + } 586 + 587 + // Save updated quota 588 + quota.LastUpdated = time.Now() 589 + if err := s.quotaManager.SaveQuota(quota); err != nil { 590 + http.Error(w, "failed to save quota", 500) 591 + return 592 + } 593 + 594 + // Return updated quota info 595 + json.NewEncoder(w).Encode(map[string]any{ 596 + "used": quota.Used, 597 + "limit": quota.Limit, 598 + }) 599 + } 600 + ``` 601 + 602 + ### SQL Query: Check Layer References 603 + 604 + ```sql 605 + -- pkg/appview/db/queries.go 606 + 607 + -- Check if user still references this layer in other manifests 608 + SELECT COUNT(*) 609 + FROM layers l 610 + JOIN manifests m ON l.manifest_id = m.id 611 + WHERE m.did = ? -- User's DID 612 + AND l.digest = ? -- Layer digest 613 + AND m.id != ? -- Exclude the manifest being deleted 614 + ``` 615 + 616 + ## Garbage Collection 617 + 618 + ### Background: Orphaned Blobs 619 + 620 + Orphaned blobs accumulate when: 621 + 1. Manifest push fails after blobs uploaded (presigned URLs bypass hold) 622 + 2. Quota exceeded - manifest rejected, blobs already in S3 623 + 3. User deletes manifest - blobs no longer referenced 624 + 625 + **GC periodically cleans these up.** 626 + 627 + ### GC Cron Implementation 628 + 629 + Similar to AppView's backfill worker, the hold service can run periodic GC: 630 + 631 + ```go 632 + // cmd/hold/gc/gc.go 633 + 634 + type GarbageCollector struct { 635 + driver storagedriver.StorageDriver 636 + appviewURL string 637 + holdURL string 638 + quotaManager *quota.Manager 639 + } 640 + 641 + // Run garbage collection 642 + func (gc *GarbageCollector) Run(ctx context.Context) error { 643 + log.Println("Starting garbage collection...") 644 + 645 + // Step 1: Get list of referenced blobs from AppView 646 + referenced, err := gc.getReferencedBlobs() 647 + if err != nil { 648 + return fmt.Errorf("failed to get referenced blobs: %w", err) 649 + } 650 + 651 + referencedSet := make(map[string]bool) 652 + for _, digest := range referenced { 653 + referencedSet[digest] = true 654 + } 655 + 656 + log.Printf("AppView reports %d referenced blobs", len(referenced)) 657 + 658 + // Step 2: Walk S3 blobs 659 + deletedCount := 0 660 + reclaimedBytes := int64(0) 661 + 662 + err = gc.driver.Walk(ctx, "/docker/registry/v2/blobs", func(fileInfo storagedriver.FileInfo) error { 663 + if fileInfo.IsDir() { 664 + return nil // Skip directories 665 + } 666 + 667 + // Extract digest from path 668 + // Path: /docker/registry/v2/blobs/sha256/ab/abc123.../data 669 + digest := extractDigestFromPath(fileInfo.Path()) 670 + 671 + if !referencedSet[digest] { 672 + // Unreferenced blob - delete it 673 + size := fileInfo.Size() 674 + 675 + if err := gc.driver.Delete(ctx, fileInfo.Path()); err != nil { 676 + log.Printf("Failed to delete blob %s: %v", digest, err) 677 + return nil // Continue anyway 678 + } 679 + 680 + deletedCount++ 681 + reclaimedBytes += size 682 + 683 + log.Printf("GC: Deleted unreferenced blob %s (%d bytes)", digest, size) 684 + } 685 + 686 + return nil 687 + }) 688 + 689 + if err != nil { 690 + return fmt.Errorf("failed to walk blobs: %w", err) 691 + } 692 + 693 + log.Printf("GC complete: deleted %d blobs, reclaimed %d bytes", 694 + deletedCount, reclaimedBytes) 695 + 696 + return nil 697 + } 698 + 699 + // Get referenced blobs from AppView 700 + func (gc *GarbageCollector) getReferencedBlobs() ([]string, error) { 701 + // Query AppView for all blobs referenced by manifests 702 + // stored in THIS hold service 703 + url := fmt.Sprintf("%s/internal/blobs/referenced?hold=%s", 704 + gc.appviewURL, url.QueryEscape(gc.holdURL)) 705 + 706 + resp, err := http.Get(url) 707 + if err != nil { 708 + return nil, err 709 + } 710 + defer resp.Body.Close() 711 + 712 + var result struct { 713 + Blobs []string `json:"blobs"` 714 + } 715 + 716 + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { 717 + return nil, err 718 + } 719 + 720 + return result.Blobs, nil 721 + } 722 + ``` 723 + 724 + ### AppView Internal API 725 + 726 + ```go 727 + // pkg/appview/handlers/internal.go 728 + 729 + // Get all referenced blobs for a specific hold 730 + func (h *InternalHandler) GetReferencedBlobs(w http.ResponseWriter, r *http.Request) { 731 + holdEndpoint := r.URL.Query().Get("hold") 732 + if holdEndpoint == "" { 733 + http.Error(w, "missing hold parameter", 400) 734 + return 735 + } 736 + 737 + // Query database for all layers in manifests stored in this hold 738 + query := ` 739 + SELECT DISTINCT l.digest 740 + FROM layers l 741 + JOIN manifests m ON l.manifest_id = m.id 742 + WHERE m.hold_endpoint = ? 743 + ` 744 + 745 + rows, err := h.db.Query(query, holdEndpoint) 746 + if err != nil { 747 + http.Error(w, "database error", 500) 748 + return 749 + } 750 + defer rows.Close() 751 + 752 + blobs := []string{} 753 + for rows.Next() { 754 + var digest string 755 + if err := rows.Scan(&digest); err != nil { 756 + continue 757 + } 758 + blobs = append(blobs, digest) 759 + } 760 + 761 + json.NewEncoder(w).Encode(map[string]any{ 762 + "blobs": blobs, 763 + "count": len(blobs), 764 + "hold": holdEndpoint, 765 + }) 766 + } 767 + ``` 768 + 769 + ### GC Cron Schedule 770 + 771 + ```go 772 + // cmd/hold/main.go 773 + 774 + func main() { 775 + // ... service setup ... 776 + 777 + // Start GC cron if enabled 778 + if os.Getenv("GC_ENABLED") == "true" { 779 + gcInterval := 24 * time.Hour // Daily by default 780 + 781 + go func() { 782 + ticker := time.NewTicker(gcInterval) 783 + defer ticker.Stop() 784 + 785 + for range ticker.C { 786 + if err := garbageCollector.Run(context.Background()); err != nil { 787 + log.Printf("GC error: %v", err) 788 + } 789 + } 790 + }() 791 + 792 + log.Printf("GC cron started: runs every %v", gcInterval) 793 + } 794 + 795 + // Start server... 796 + } 797 + ``` 798 + 799 + ## Quota Reconciliation 800 + 801 + ### PDS as Source of Truth 802 + 803 + **Key insight:** Manifest records in PDS are publicly readable (no OAuth needed for reads). 804 + 805 + Each manifest contains: 806 + - Repository name 807 + - Digest 808 + - Layers array with digest + size 809 + - Hold endpoint 810 + 811 + The hold service can query the PDS to calculate the user's true quota: 812 + 813 + ``` 814 + 1. List all io.atcr.manifest records for user 815 + 2. Filter manifests where holdEndpoint == this hold service 816 + 3. Extract unique layers (deduplicate by digest) 817 + 4. Sum layer sizes = true quota usage 818 + 5. Compare to quota file 819 + 6. Fix discrepancies 820 + ``` 821 + 822 + ### Implementation 823 + 824 + ```go 825 + // cmd/hold/quota/reconcile.go 826 + 827 + type Reconciler struct { 828 + quotaManager *Manager 829 + atprotoResolver *atproto.Resolver 830 + holdURL string 831 + } 832 + 833 + // ReconcileUser recalculates quota from PDS manifests 834 + func (r *Reconciler) ReconcileUser(ctx context.Context, did string) error { 835 + log.Printf("Reconciling quota for %s", did) 836 + 837 + // Step 1: Resolve user's PDS endpoint 838 + identity, err := r.atprotoResolver.ResolveIdentity(ctx, did) 839 + if err != nil { 840 + return fmt.Errorf("failed to resolve DID: %w", err) 841 + } 842 + 843 + // Step 2: Create unauthenticated ATProto client 844 + // (manifest records are public - no OAuth needed) 845 + client := atproto.NewClient(identity.PDSEndpoint, did, "") 846 + 847 + // Step 3: List all manifest records for this user 848 + manifests, err := client.ListRecords(ctx, atproto.ManifestCollection, 1000) 849 + if err != nil { 850 + return fmt.Errorf("failed to list manifests: %w", err) 851 + } 852 + 853 + // Step 4: Filter manifests stored in THIS hold service 854 + // and extract unique layers 855 + uniqueLayers := make(map[string]int64) // digest -> size 856 + 857 + for _, record := range manifests { 858 + var manifest atproto.ManifestRecord 859 + if err := json.Unmarshal(record.Value, &manifest); err != nil { 860 + log.Printf("Warning: failed to parse manifest: %v", err) 861 + continue 862 + } 863 + 864 + // Only count manifests stored in this hold 865 + if manifest.HoldEndpoint != r.holdURL { 866 + continue 867 + } 868 + 869 + // Add config blob 870 + if manifest.Config.Digest != "" { 871 + uniqueLayers[manifest.Config.Digest] = manifest.Config.Size 872 + } 873 + 874 + // Add layer blobs 875 + for _, layer := range manifest.Layers { 876 + uniqueLayers[layer.Digest] = layer.Size 877 + } 878 + } 879 + 880 + // Step 5: Calculate true quota usage 881 + trueUsage := int64(0) 882 + for _, size := range uniqueLayers { 883 + trueUsage += size 884 + } 885 + 886 + log.Printf("User %s true usage from PDS: %d bytes (%d unique layers)", 887 + did, trueUsage, len(uniqueLayers)) 888 + 889 + // Step 6: Compare with current quota file 890 + quota, err := r.quotaManager.GetQuota(did) 891 + if err != nil { 892 + log.Printf("No existing quota for %s, creating new", did) 893 + quota = &Quota{ 894 + DID: did, 895 + Limit: r.quotaManager.DefaultLimit, 896 + ClaimedLayers: make(map[string]int64), 897 + } 898 + } 899 + 900 + // Step 7: Fix discrepancies 901 + if quota.Used != trueUsage || len(quota.ClaimedLayers) != len(uniqueLayers) { 902 + log.Printf("Quota mismatch for %s: recorded=%d, actual=%d (diff=%d)", 903 + did, quota.Used, trueUsage, trueUsage - quota.Used) 904 + 905 + // Update quota to match PDS truth 906 + quota.Used = trueUsage 907 + quota.ClaimedLayers = uniqueLayers 908 + quota.LastUpdated = time.Now() 909 + 910 + if err := r.quotaManager.SaveQuota(quota); err != nil { 911 + return fmt.Errorf("failed to save reconciled quota: %w", err) 912 + } 913 + 914 + log.Printf("Reconciled quota for %s: %d bytes", did, trueUsage) 915 + } else { 916 + log.Printf("Quota for %s is accurate", did) 917 + } 918 + 919 + return nil 920 + } 921 + 922 + // ReconcileAll reconciles all users (run periodically) 923 + func (r *Reconciler) ReconcileAll(ctx context.Context) error { 924 + // Get list of all users with quota files 925 + users, err := r.quotaManager.ListUsers() 926 + if err != nil { 927 + return err 928 + } 929 + 930 + log.Printf("Starting reconciliation for %d users", len(users)) 931 + 932 + for _, did := range users { 933 + if err := r.ReconcileUser(ctx, did); err != nil { 934 + log.Printf("Failed to reconcile %s: %v", did, err) 935 + // Continue with other users 936 + } 937 + } 938 + 939 + log.Println("Reconciliation complete") 940 + return nil 941 + } 942 + ``` 943 + 944 + ### Reconciliation Cron 945 + 946 + ```go 947 + // cmd/hold/main.go 948 + 949 + func main() { 950 + // ... setup ... 951 + 952 + // Start reconciliation cron 953 + if os.Getenv("QUOTA_RECONCILE_ENABLED") == "true" { 954 + reconcileInterval := 24 * time.Hour // Daily 955 + 956 + go func() { 957 + ticker := time.NewTicker(reconcileInterval) 958 + defer ticker.Stop() 959 + 960 + for range ticker.C { 961 + if err := reconciler.ReconcileAll(context.Background()); err != nil { 962 + log.Printf("Reconciliation error: %v", err) 963 + } 964 + } 965 + }() 966 + 967 + log.Printf("Quota reconciliation cron started: runs every %v", reconcileInterval) 968 + } 969 + 970 + // ... start server ... 971 + } 972 + ``` 973 + 974 + ### Why PDS as Source of Truth Works 975 + 976 + 1. **Manifests are canonical** - If manifest exists in PDS, user owns those layers 977 + 2. **Public reads** - No OAuth needed, just resolve DID → PDS endpoint 978 + 3. **ATProto durability** - PDS is user's authoritative data store 979 + 4. **AppView is cache** - AppView database might lag or have inconsistencies 980 + 5. **Reconciliation fixes drift** - Periodic sync from PDS ensures accuracy 981 + 982 + **Example reconciliation scenarios:** 983 + 984 + - **Orphaned quota entries:** User deleted manifest from PDS, but hold quota still has it 985 + → Reconciliation removes from claimed_layers 986 + 987 + - **Missing quota entries:** User pushed manifest, but quota update failed 988 + → Reconciliation adds to claimed_layers 989 + 990 + - **Race condition duplicates:** Two concurrent pushes double-counted a layer 991 + → Reconciliation fixes to actual usage 992 + 993 + ## Configuration 994 + 995 + ### Hold Service Environment Variables 996 + 997 + ```bash 998 + # .env.hold 999 + 1000 + # ============================================================================ 1001 + # Quota Configuration 1002 + # ============================================================================ 1003 + 1004 + # Enable quota enforcement 1005 + QUOTA_ENABLED=true 1006 + 1007 + # Default quota limit per user (bytes) 1008 + # 10GB = 10737418240 1009 + # 50GB = 53687091200 1010 + # 100GB = 107374182400 1011 + QUOTA_DEFAULT_LIMIT=10737418240 1012 + 1013 + # Storage backend for quota data 1014 + # Options: s3, sqlite 1015 + QUOTA_STORAGE_BACKEND=s3 1016 + 1017 + # For S3-based storage: 1018 + # Quota files stored in same bucket as blobs 1019 + QUOTA_STORAGE_PREFIX=/atcr/quota/ 1020 + 1021 + # For SQLite-based storage: 1022 + QUOTA_DB_PATH=/var/lib/atcr/hold-quota.db 1023 + 1024 + # ============================================================================ 1025 + # Garbage Collection 1026 + # ============================================================================ 1027 + 1028 + # Enable periodic garbage collection 1029 + GC_ENABLED=true 1030 + 1031 + # GC interval (default: 24h) 1032 + GC_INTERVAL=24h 1033 + 1034 + # AppView URL for GC reference checking 1035 + APPVIEW_URL=https://atcr.io 1036 + 1037 + # ============================================================================ 1038 + # Quota Reconciliation 1039 + # ============================================================================ 1040 + 1041 + # Enable quota reconciliation from PDS 1042 + QUOTA_RECONCILE_ENABLED=true 1043 + 1044 + # Reconciliation interval (default: 24h) 1045 + QUOTA_RECONCILE_INTERVAL=24h 1046 + 1047 + # ============================================================================ 1048 + # Hold Service Identity (Required) 1049 + # ============================================================================ 1050 + 1051 + # Public URL of this hold service 1052 + HOLD_PUBLIC_URL=https://hold1.example.com 1053 + 1054 + # Owner DID (for auto-registration) 1055 + HOLD_OWNER=did:plc:xyz123 1056 + ``` 1057 + 1058 + ### AppView Configuration 1059 + 1060 + ```bash 1061 + # .env.appview 1062 + 1063 + # Internal API endpoint for hold services 1064 + # Used for GC reference checking 1065 + ATCR_INTERNAL_API_ENABLED=true 1066 + 1067 + # Optional: authentication token for internal APIs 1068 + ATCR_INTERNAL_API_TOKEN=secret123 1069 + ``` 1070 + 1071 + ## Trade-offs & Design Decisions 1072 + 1073 + ### 1. Claimed Storage vs Physical Storage 1074 + 1075 + **Decision:** Track claimed storage (logical accounting) 1076 + 1077 + **Why:** 1078 + - Predictable for users: "you pay for what you upload" 1079 + - No complex cross-user dependencies 1080 + - Delete always gives you quota back 1081 + - Matches Harbor's proven model 1082 + 1083 + **Trade-off:** 1084 + - Total claimed can exceed physical storage 1085 + - Users might complain "I uploaded 10GB but S3 only has 6GB" 1086 + 1087 + **Mitigation:** 1088 + - Show deduplication savings metric 1089 + - Educate users: "You claimed 10GB, but deduplication saved 4GB" 1090 + 1091 + ### 2. S3 vs SQLite for Quota Storage 1092 + 1093 + **Decision:** Support both, recommend based on use case 1094 + 1095 + **S3 Pros:** 1096 + - No database to manage 1097 + - Quota data lives with blobs 1098 + - Better for ephemeral BYOS 1099 + 1100 + **SQLite Pros:** 1101 + - Faster (no network) 1102 + - ACID transactions (no race conditions) 1103 + - Better for high-traffic shared holds 1104 + 1105 + **Trade-off:** 1106 + - S3: eventual consistency, race conditions 1107 + - SQLite: stateful service, scaling challenges 1108 + 1109 + **Mitigation:** 1110 + - Reconciliation fixes S3 inconsistencies 1111 + - SQLite can use shared DB for multi-instance 1112 + 1113 + ### 3. Optimistic Quota Update 1114 + 1115 + **Decision:** Update quota BEFORE upload completes 1116 + 1117 + **Why:** 1118 + - Prevent race conditions (two users uploading simultaneously) 1119 + - Can reject before presigned URL generated 1120 + - Simpler flow 1121 + 1122 + **Trade-off:** 1123 + - If upload fails, quota already incremented (user "paid" for nothing) 1124 + 1125 + **Mitigation:** 1126 + - Reconciliation from PDS fixes orphaned quota entries 1127 + - Acceptable for MVP (upload failures are rare) 1128 + 1129 + ### 4. AppView as Intermediary 1130 + 1131 + **Decision:** AppView notifies hold service on deletes 1132 + 1133 + **Why:** 1134 + - AppView already has manifest/layer database 1135 + - Can efficiently check if layer still referenced 1136 + - Hold service doesn't need to query PDS on every delete 1137 + 1138 + **Trade-off:** 1139 + - AppView → Hold dependency 1140 + - Network hop on delete 1141 + 1142 + **Mitigation:** 1143 + - If notification fails, reconciliation fixes quota 1144 + - Eventually consistent is acceptable 1145 + 1146 + ### 5. PDS as Source of Truth 1147 + 1148 + **Decision:** Use PDS manifests for reconciliation 1149 + 1150 + **Why:** 1151 + - Manifests in PDS are canonical user data 1152 + - Public reads (no OAuth for reconciliation) 1153 + - AppView database might lag or be inconsistent 1154 + 1155 + **Trade-off:** 1156 + - Reconciliation requires PDS queries (slower) 1157 + - Limited to 1000 manifests per query 1158 + 1159 + **Mitigation:** 1160 + - Run reconciliation daily (not real-time) 1161 + - Paginate if user has >1000 manifests 1162 + 1163 + ## Future Enhancements 1164 + 1165 + ### 1. Quota API Endpoints 1166 + 1167 + ``` 1168 + GET /quota/usage - Get current user's quota 1169 + GET /quota/breakdown - Get storage by repository 1170 + POST /quota/limit - Update user's quota limit (admin) 1171 + GET /quota/stats - Get hold-wide statistics 1172 + ``` 1173 + 1174 + ### 2. Quota Alerts 1175 + 1176 + Notify users when approaching limit: 1177 + - Email/webhook at 80%, 90%, 95% 1178 + - Reject uploads at 100% (currently implemented) 1179 + - Grace period: allow 105% temporarily 1180 + 1181 + ### 3. Tiered Quotas 1182 + 1183 + Different limits based on user tier: 1184 + - Free: 10GB 1185 + - Pro: 100GB 1186 + - Enterprise: unlimited 1187 + 1188 + ### 4. Quota Purchasing 1189 + 1190 + Allow users to buy additional storage: 1191 + - Stripe integration 1192 + - $0.10/GB/month pricing 1193 + - Dynamic limit updates 1194 + 1195 + ### 5. Cross-Hold Deduplication 1196 + 1197 + If multiple holds share same S3 bucket: 1198 + - Track blob ownership globally 1199 + - Split costs proportionally 1200 + - More complex, but maximizes deduplication 1201 + 1202 + ### 6. Manifest-Based Quota (Alternative Model) 1203 + 1204 + Instead of tracking layers, track manifests: 1205 + - Simpler: just count manifest sizes 1206 + - No deduplication benefits for users 1207 + - Might be acceptable for some use cases 1208 + 1209 + ### 7. Redis-Based Quota (High Performance) 1210 + 1211 + For high-traffic registries: 1212 + - Use Redis instead of S3/SQLite 1213 + - Sub-millisecond quota checks 1214 + - Harbor-proven approach 1215 + 1216 + ### 8. Quota Visualizations 1217 + 1218 + Web UI showing: 1219 + - Storage usage over time 1220 + - Top consumers by repository 1221 + - Deduplication savings graph 1222 + - Layer size distribution 1223 + 1224 + ## Appendix: SQL Queries 1225 + 1226 + ### Check if User Still References Layer 1227 + 1228 + ```sql 1229 + -- After deleting manifest, check if user has other manifests using this layer 1230 + SELECT COUNT(*) 1231 + FROM layers l 1232 + JOIN manifests m ON l.manifest_id = m.id 1233 + WHERE m.did = ? -- User's DID 1234 + AND l.digest = ? -- Layer digest to check 1235 + AND m.id != ? -- Exclude the manifest being deleted 1236 + ``` 1237 + 1238 + ### Get All Unique Layers for User 1239 + 1240 + ```sql 1241 + -- Calculate true quota usage for a user 1242 + SELECT DISTINCT l.digest, l.size 1243 + FROM layers l 1244 + JOIN manifests m ON l.manifest_id = m.id 1245 + WHERE m.did = ? 1246 + AND m.hold_endpoint = ? 1247 + ``` 1248 + 1249 + ### Get Referenced Blobs for Hold 1250 + 1251 + ```sql 1252 + -- For GC: get all blobs still referenced by any user of this hold 1253 + SELECT DISTINCT l.digest 1254 + FROM layers l 1255 + JOIN manifests m ON l.manifest_id = m.id 1256 + WHERE m.hold_endpoint = ? 1257 + ``` 1258 + 1259 + ### Get Storage Stats by Repository 1260 + 1261 + ```sql 1262 + -- User's storage broken down by repository 1263 + SELECT 1264 + m.repository, 1265 + COUNT(DISTINCT m.id) as manifest_count, 1266 + COUNT(DISTINCT l.digest) as unique_layers, 1267 + SUM(l.size) as total_size 1268 + FROM manifests m 1269 + JOIN layers l ON l.manifest_id = m.id 1270 + WHERE m.did = ? 1271 + AND m.hold_endpoint = ? 1272 + GROUP BY m.repository 1273 + ORDER BY total_size DESC 1274 + ``` 1275 + 1276 + ## References 1277 + 1278 + - **Harbor Quotas:** https://goharbor.io/docs/1.10/administration/configure-project-quotas/ 1279 + - **Harbor Source:** https://github.com/goharbor/harbor 1280 + - **ATProto Spec:** https://atproto.com/specs/record 1281 + - **OCI Distribution Spec:** https://github.com/opencontainers/distribution-spec 1282 + - **S3 API Reference:** https://docs.aws.amazon.com/AmazonS3/latest/API/ 1283 + - **Distribution GC:** https://github.com/distribution/distribution/blob/main/registry/storage/garbagecollect.go 1284 + 1285 + --- 1286 + 1287 + **Document Version:** 1.0 1288 + **Last Updated:** 2025-10-09 1289 + **Author:** Generated from implementation research and Harbor analysis
-460
docs/SPEC.md
··· 1 - ATProto Container Registry (atcr.io) Implementation Plan 2 - 3 - Project Structure 4 - 5 - /home/data/atcr.io/ 6 - ├── cmd/ 7 - │ └── registry/ 8 - │ └── main.go # Entrypoint that imports distribution 9 - ├── pkg/ 10 - │ ├── atproto/ 11 - │ │ ├── client.go # ATProto client wrapper (using indigo) 12 - │ │ ├── manifest_store.go # Implements distribution.ManifestService 13 - │ │ ├── resolver.go # DID/handle resolution (alice → did:plc:...) 14 - │ │ └── lexicon.go # ATProto record schemas for manifests 15 - │ ├── storage/ 16 - │ │ ├── s3_blob_store.go # Wraps distribution's S3 driver for blobs 17 - │ │ └── routing_repository.go # Routes manifests→ATProto, blobs→S3 18 - │ ├── middleware/ 19 - │ │ ├── repository.go # Repository middleware registration 20 - │ │ └── registry.go # Registry middleware for name resolution 21 - │ └── server/ 22 - │ └── handler.go # HTTP wrapper for custom name resolution 23 - ├── config/ 24 - │ └── config.yml # Registry configuration 25 - ├── go.mod 26 - ├── go.sum 27 - ├── Dockerfile 28 - ├── README.md 29 - └── CLAUDE.md # Updated with architecture docs 30 - 31 - 32 - Implementation Steps 33 - 34 - Phase 1: Project Setup 35 - 36 - 1. Initialize Go module with github.com/distribution/distribution/v3 and github.com/bluesky-social/indigo 37 - 2. Create basic project structure 38 - 3. Set up cmd/appview/main.go that imports distribution and registers middleware 39 - 40 - Phase 2: Core ATProto Integration 41 - 42 - 4. Implement DID/handle resolver (pkg/atproto/resolver.go) 43 - - Resolve handles to DIDs (alice.bsky.social → did:plc:xyz) 44 - - Discover PDS endpoints from DID documents 45 - 5. Create ATProto client wrapper (pkg/atproto/client.go) 46 - - Wrap indigo SDK for manifest storage 47 - - Handle authentication with PDS 48 - 6. Design ATProto lexicon for manifest records (pkg/atproto/lexicon.go) 49 - - Define schema for storing OCI manifests as ATProto records 50 - 51 - Phase 3: Storage Layer 52 - 53 - 7. Implement ATProto manifest store (pkg/atproto/manifest_store.go) 54 - - Implements distribution.ManifestService 55 - - Stores/retrieves manifests from PDS 56 - 8. Implement S3 blob store wrapper (pkg/storage/s3_blob_store.go) 57 - - Wraps distribution's built-in S3 driver 58 - 9. Create routing repository (pkg/storage/routing_repository.go) 59 - - Returns ATProto store for Manifests() 60 - - Returns S3 store for Blobs() 61 - 62 - Phase 4: Middleware Layer 63 - 64 - 10. Implement repository middleware (pkg/middleware/repository.go) 65 - - Registers routing repository 66 - - Configurable via YAML 67 - 11. Implement registry/namespace middleware (pkg/middleware/registry.go) 68 - - Intercepts Repository(name) calls 69 - - Performs name resolution before repository creation 70 - 71 - Phase 5: HTTP Layer (if needed) 72 - 73 - 12. Create custom HTTP handler (pkg/server/handler.go) 74 - - Wraps distribution's HTTP handlers 75 - - Performs early name resolution: atcr.io/alice/myimage → resolve alice 76 - - Delegates to distribution handlers 77 - 78 - Phase 6: Configuration & Deployment 79 - 80 - 13. Create registry configuration (config/config.yml) 81 - 14. Create Dockerfile for building atcr-appview binary 82 - 16. Write README.md with usage instructions 83 - 84 - Phase 7: Documentation 85 - 86 - 17. Update CLAUDE.md with: 87 - - Architecture overview (ATProto for manifests, S3 for blobs) 88 - - Build/run/test commands 89 - - How name resolution works 90 - - Middleware registration flow 91 - - Key design decisions 92 - 93 - Key Technical Decisions 94 - 95 - ATProto Storage Design: 96 - - Manifests stored as ATProto records in user's PDS 97 - - Each image manifest is a record in a io.atcr.manifest collection 98 - - Record key = image digest (sha256:...) 99 - 100 - Name Resolution: 101 - - atcr.io/alice/myimage → resolve alice to DID → discover PDS 102 - - Support both handles (alice.bsky.social) and DIDs (did:plc:xyz) 103 - 104 - Blob Storage: 105 - - All layers/blobs in S3 (content-addressable by digest) 106 - - Manifests reference S3-stored blobs by digest 107 - - S3 provides cheap, durable blob storage 108 - 109 - Middleware Strategy: 110 - - Repository middleware for storage routing 111 - - Registry middleware (or HTTP wrapper) for name resolution 112 - - No fork of distribution core needed 113 - 114 - Perfect. To match Docker Hub/ghcr.io/gcr.io, here's what we need: 115 - 116 - ● Implementation Plan (Drop-in replacement for Docker Hub/ghcr.io) 117 - 118 - Flow 1: Standard Token Auth (Like Docker Hub) - PRIMARY 119 - 120 - # User experience 121 - docker login atcr.io -u alice.bsky.social -p <atproto-app-password> 122 - docker push atcr.io/alice/myapp:latest 123 - 124 - # Behind the scenes 125 - 1. docker login stores credentials locally 126 - 2. docker push → Registry returns 401 with WWW-Authenticate: Bearer realm="https://atcr.io/auth/token"... 127 - 3. Docker auto-calls /auth/token with Basic auth (alice.bsky.social:app-password) 128 - 4. Auth service validates against ATProto createSession 129 - 5. Returns JWT token with scope for alice/myapp 130 - 6. Docker uses JWT for manifest/blob uploads 131 - 7. Registry validates JWT signature and scope 132 - 133 - Components: 134 - - /auth/token endpoint (standalone service or embedded) 135 - - ATProto session validator (username/password → validate via PDS) 136 - - JWT issuer/signer 137 - - JWT validator middleware for registry 138 - 139 - Flow 2: Credential Helper (Like gcr.io) - ADVANCED 140 - 141 - # User experience 142 - docker-credential-atcr configure 143 - # Opens browser for ATProto OAuth 144 - docker push atcr.io/alice/myapp:latest 145 - # No manual login needed 146 - 147 - # Behind the scenes 148 - 1. Helper does OAuth flow → gets ATProto access token 149 - 2. Caches token securely 150 - 3. When Docker needs credentials, calls helper via stdin/stdout 151 - 4. Helper exchanges ATProto token for registry JWT at /auth/exchange 152 - 5. Returns JWT to Docker 153 - 6. Docker uses JWT for requests 154 - 155 - Components: 156 - - cmd/credential-helper/main.go - Standalone binary 157 - - ATProto OAuth client 158 - - Token exchange endpoint (/auth/exchange) 159 - - Secure token cache 160 - 161 - Architecture: 162 - 163 - pkg/auth/ 164 - ├── token/ 165 - │ ├── service.go # HTTP handler for /auth/token 166 - │ ├── claims.go # JWT claims structure 167 - │ ├── issuer.go # Signs JWTs 168 - │ └── validator.go # Validates JWTs (middleware for registry) 169 - ├── atproto/ 170 - │ ├── session.go # Validates username/password via ATProto 171 - │ └── oauth.go # OAuth flow implementation 172 - ├── exchange/ 173 - │ └── handler.go # /auth/exchange endpoint (OAuth → JWT) 174 - └── scope.go # Parses/validates Docker scopes 175 - 176 - cmd/ 177 - ├── registry/main.go # Registry server (existing) 178 - ├── auth/main.go # Standalone auth service (optional) 179 - └── credential-helper/ 180 - └── main.go # docker-credential-atcr binary 181 - 182 - Config: 183 - 184 - auth: 185 - token: 186 - realm: https://atcr.io/auth/token # Where Docker gets tokens 187 - service: atcr.io 188 - issuer: atcr.io 189 - rootcertbundle: /etc/atcr/token-signing.crt 190 - privatekey: /etc/atcr/token-signing.pem 191 - expiration: 300 192 - 193 - atproto: 194 - # Used by auth service to validate credentials 195 - pds_endpoint: https://bsky.social 196 - client_id: atcr-appview 197 - oauth_redirect: http://localhost:8888/callback 198 - 199 - ATProto OAuth Implementation Plan 200 - 201 - Architecture 202 - 203 - Dependencies: 204 - - authelia.com/client/oauth2 - OAuth + PAR support 205 - - github.com/AxisCommunications/go-dpop - DPoP proof generation (handles JWK automatically) 206 - - github.com/golang-jwt/jwt/v5 - JWT library (transitive via go-dpop) 207 - - Our existing pkg/atproto/resolver.go - ATProto identity resolution 208 - 209 - Implementation Components 210 - 211 - 1. OAuth Client (pkg/auth/oauth/client.go) - ~100 lines 212 - 213 - type Client struct { 214 - config *oauth2.Config 215 - dpopKey *ecdsa.PrivateKey 216 - resolver *atproto.Resolver 217 - clientID string // URL to our metadata document 218 - redirectURI string 219 - dpopNonce string // Server-provided nonce 220 - } 221 - 222 - func NewClient(clientID, redirectURI string) (*Client, error) 223 - func (c *Client) AuthorizeURL(handle string, scopes []string) (string, error) 224 - func (c *Client) Exchange(code string) (*Token, error) 225 - func (c *Client) addDPoPHeader(req *http.Request, method, url string) error 226 - 227 - Flow: 228 - 1. Generate ECDSA P-256 key for DPoP 229 - 2. Discover authorization server from handle/DID 230 - 3. Use authelia's PushedAuth() for PAR with DPoP header 231 - 4. Exchange code for token with DPoP proof 232 - 233 - 2. Authorization Server Discovery (pkg/auth/oauth/discovery.go) - ~30 lines 234 - 235 - type AuthServerMetadata struct { 236 - Issuer string `json:"issuer"` 237 - AuthorizationEndpoint string `json:"authorization_endpoint"` 238 - TokenEndpoint string `json:"token_endpoint"` 239 - PushedAuthorizationRequestEndpoint string `json:"pushed_authorization_request_endpoint"` 240 - DPoPSigningAlgValuesSupported []string `json:"dpop_signing_alg_values_supported"` 241 - } 242 - 243 - func DiscoverAuthServer(pdsEndpoint string) (*AuthServerMetadata, error) 244 - 245 - Implementation: 246 - - GET {pds}/.well-known/oauth-authorization-server 247 - - Parse JSON metadata 248 - - Validate required endpoints exist 249 - 250 - 3. Client Metadata Server (pkg/auth/oauth/metadata.go) - ~40 lines 251 - 252 - type ClientMetadata struct { 253 - ClientID string `json:"client_id"` 254 - RedirectURIs []string `json:"redirect_uris"` 255 - GrantTypes []string `json:"grant_types"` 256 - ResponseTypes []string `json:"response_types"` 257 - Scope string `json:"scope"` 258 - DPoPBoundAccessTokens bool `json:"dpop_bound_access_tokens"` 259 - } 260 - 261 - func ServeMetadata(clientID string, redirectURIs []string) http.Handler 262 - 263 - Serves: https://atcr.io/oauth/client-metadata.json 264 - 265 - 4. Token Storage (pkg/auth/oauth/storage.go) - ~50 lines 266 - 267 - type TokenStore struct { 268 - AccessToken string 269 - RefreshToken string 270 - DPoPKey *ecdsa.PrivateKey // Persist for refresh 271 - ExpiresAt time.Time 272 - } 273 - 274 - func (s *TokenStore) Save(path string) error 275 - func LoadTokenStore(path string) (*TokenStore, error) 276 - 277 - Storage location: ~/.atcr/oauth-tokens.json 278 - 279 - 5. Credential Helper (cmd/credential-helper/main.go) - ~80 lines 280 - 281 - // Docker credential helper protocol 282 - // Reads JSON from stdin, writes to stdout 283 - 284 - func main() { 285 - if len(os.Args) < 2 { 286 - os.Exit(1) 287 - } 288 - 289 - switch os.Args[1] { 290 - case "get": 291 - handleGet() // Return credentials for registry 292 - case "store": 293 - handleStore() // Store credentials 294 - case "erase": 295 - handleErase() // Remove credentials 296 - } 297 - } 298 - 299 - func handleGet() { 300 - var request struct { 301 - ServerURL string `json:"ServerURL"` 302 - } 303 - json.NewDecoder(os.Stdin).Decode(&request) 304 - 305 - // Load token from storage 306 - // Exchange for registry JWT if needed 307 - // Output: {"Username": "oauth2", "Secret": "<jwt>"} 308 - } 309 - 310 - 6. OAuth Flow (cmd/credential-helper/oauth.go) - ~60 lines 311 - 312 - func RunOAuthFlow(handle string) (*TokenStore, error) { 313 - // 1. Start local HTTP server on :8888 314 - // 2. Open browser to authorization URL 315 - // 3. Wait for callback with code 316 - // 4. Exchange code for token 317 - // 5. Save token store 318 - // 6. Return token 319 - } 320 - 321 - func startCallbackServer() (chan string, *http.Server) 322 - 323 - Complete Flow Example 324 - 325 - User runs: 326 - docker-credential-atcr configure 327 - 328 - What happens: 329 - 330 - 1. Generate DPoP key (client.go) 331 - dpopKey, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) 332 - 333 - 2. Resolve handle → DID → PDS (using our resolver) 334 - did, pds, _ := resolver.ResolveIdentity(ctx, "alice.bsky.social") 335 - 336 - 3. Discover auth server (discovery.go) 337 - metadata, _ := DiscoverAuthServer(pds) 338 - // Returns: PAR endpoint, token endpoint, etc. 339 - 340 - 4. Create PAR request with DPoP (client.go + go-dpop) 341 - // Generate DPoP proof for PAR endpoint 342 - claims := &dpop.ProofTokenClaims{ 343 - Method: dpop.POST, 344 - URL: metadata.PushedAuthorizationRequestEndpoint, 345 - RegisteredClaims: &jwt.RegisteredClaims{ 346 - IssuedAt: jwt.NewNumericDate(time.Now()), 347 - }, 348 - } 349 - dpopProof, _ := dpop.Create(jwt.SigningMethodES256, claims, dpopKey) 350 - 351 - // Use authelia for PAR 352 - config := &oauth2.Config{ 353 - ClientID: "https://atcr.io/oauth/client-metadata.json", 354 - Endpoint: oauth2.Endpoint{ 355 - AuthURL: metadata.AuthorizationEndpoint, 356 - TokenURL: metadata.TokenEndpoint, 357 - }, 358 - } 359 - 360 - // Create custom HTTP client that adds DPoP header 361 - client := &http.Client{ 362 - Transport: &dpopTransport{ 363 - base: http.DefaultTransport, 364 - dpopKey: dpopKey, 365 - }, 366 - } 367 - ctx := context.WithValue(context.Background(), oauth2.HTTPClient, client) 368 - 369 - // PAR request (authelia handles this) 370 - authURL, parResp, _ := config.PushedAuth(ctx, state, 371 - oauth2.SetAuthURLParam("code_challenge", pkceChallenge), 372 - oauth2.SetAuthURLParam("code_challenge_method", "S256"), 373 - ) 374 - 375 - 5. Open browser, get code (oauth.go) 376 - exec.Command("open", authURL).Run() 377 - // User authorizes 378 - // Callback: http://localhost:8888?code=xyz&state=abc 379 - 380 - 6. Exchange code for token with DPoP (client.go + go-dpop) 381 - // Generate DPoP proof for token endpoint 382 - claims := &dpop.ProofTokenClaims{ 383 - Method: dpop.POST, 384 - URL: metadata.TokenEndpoint, 385 - RegisteredClaims: &jwt.RegisteredClaims{ 386 - IssuedAt: jwt.NewNumericDate(time.Now()), 387 - }, 388 - } 389 - dpopProof, _ := dpop.Create(jwt.SigningMethodES256, claims, dpopKey) 390 - 391 - // Exchange (with DPoP header added by our transport) 392 - token, _ := config.Exchange(ctx, code, 393 - oauth2.SetAuthURLParam("code_verifier", pkceVerifier), 394 - ) 395 - 396 - 7. Save token + DPoP key (storage.go) 397 - store := &TokenStore{ 398 - AccessToken: token.AccessToken, 399 - RefreshToken: token.RefreshToken, 400 - DPoPKey: dpopKey, 401 - ExpiresAt: token.Expiry, 402 - } 403 - store.Save("~/.atcr/oauth-tokens.json") 404 - 405 - Later, when docker push happens: 406 - docker push atcr.io/alice/myapp:latest 407 - 408 - 1. Docker calls credential helper: docker-credential-atcr get 409 - 2. Helper loads stored token 410 - 3. Helper calls /auth/exchange with OAuth token → gets registry JWT 411 - 4. Returns JWT to Docker 412 - 5. Docker uses JWT for push 413 - 414 - Directory Structure 415 - 416 - pkg/auth/oauth/ 417 - ├── client.go # OAuth client with DPoP integration 418 - ├── discovery.go # Authorization server discovery 419 - ├── metadata.go # Client metadata server 420 - ├── storage.go # Token persistence 421 - └── transport.go # HTTP transport that adds DPoP headers 422 - 423 - cmd/credential-helper/ 424 - ├── main.go # Docker credential helper protocol 425 - ├── oauth.go # OAuth flow (browser, callback) 426 - └── config.go # Configuration 427 - 428 - go.mod additions: 429 - authelia.com/client/oauth2 v0.25.0 430 - github.com/AxisCommunications/go-dpop v1.1.2 431 - 432 - Unified Model 433 - 434 - Every hold service requires HOLD_OWNER: 435 - - Owner's PDS has the io.atcr.hold record 436 - - Owner's PDS has all io.atcr.hold.crew records 437 - - Authorization is always governed by PDS records 438 - 439 - For "public" hold (like Tangled's public knot): 440 - - Owner creates hold with public: true 441 - - Anyone can push/pull without being crew 442 - - Owner can add crew records for special privileges/tracking if desired 443 - 444 - Config has emergency override: 445 - auth: 446 - # Emergency freeze: ignore public setting, restrict to crew only 447 - # Use this to stop abuse without changing PDS records 448 - freeze: false 449 - 450 - Authorization logic: 451 - 1. Check freeze in config → if true, skip to crew check 452 - 2. Query owner's PDS for io.atcr.hold record 453 - 3. If public: true → allow all operations (unless frozen) 454 - 4. If public: false OR frozen → query io.atcr.hold.crew records, check membership 455 - 456 - Remove from config: 457 - - allow_all (replaced by public: true in PDS) 458 - - allowed_dids (replaced by crew records in PDS) 459 - 460 - This way the hold owner at atcr.io can run a public hold at hold1.atcr.io that anyone can use, but can freeze it instantly if needed without touching PDS records.
-334
docs/TESTING.md
··· 1 - # Local Testing Guide 2 - 3 - ## Quick Start 4 - 5 - ```bash 6 - ./test-local.sh 7 - ``` 8 - 9 - This automated script will: 10 - 1. Create storage directories 11 - 2. Build all binaries 12 - 3. Start both services 13 - 4. Show test commands 14 - 15 - ## Manual Testing Steps 16 - 17 - ### 1. Setup Directories 18 - 19 - ```bash 20 - sudo mkdir -p /var/lib/atcr/{blobs,hold,auth} 21 - sudo chown -R $USER:$USER /var/lib/atcr 22 - ``` 23 - 24 - ### 2. Build Binaries 25 - 26 - ```bash 27 - go build -o atcr-appview ./cmd/appview 28 - go build -o atcr-hold ./cmd/hold 29 - go build -o docker-credential-atcr ./cmd/credential-helper 30 - ``` 31 - 32 - ### 3. Configure Environment 33 - 34 - Create a `.env` file in the project root: 35 - 36 - ```bash 37 - cp .env.example .env 38 - ``` 39 - 40 - Edit `.env` with your credentials: 41 - 42 - ```env 43 - # Your ATProto handle 44 - ATPROTO_HANDLE=your-handle.bsky.social 45 - 46 - # Hold service public URL (hostname becomes the hold name) 47 - HOLD_PUBLIC_URL=http://127.0.0.1:8080 48 - 49 - # Enable OAuth registration on startup 50 - HOLD_AUTO_REGISTER=true 51 - ``` 52 - 53 - **Notes:** 54 - - Use your Bluesky handle (e.g., `alice.bsky.social`) 55 - - For localhost, use `127.0.0.1` instead of `localhost` for OAuth 56 - - The hostname from the URL becomes the hold name (e.g., `127.0.0.1` or `hold1.atcr.io`) 57 - 58 - **Load environment:** 59 - ```bash 60 - export $(cat .env | xargs) 61 - ``` 62 - 63 - ### 4. Start Services 64 - 65 - **Terminal 1 - AppView:** 66 - ```bash 67 - ./atcr-appview serve config/config.yml 68 - ``` 69 - 70 - **Terminal 2 - Hold:** 71 - ```bash 72 - ./atcr-hold config/hold.yml 73 - ``` 74 - 75 - ### 5. Start Services and OAuth Registration 76 - 77 - **Terminal 1 - AppView:** 78 - ```bash 79 - ./atcr-appview serve config/config.yml 80 - ``` 81 - 82 - **Terminal 2 - Hold (OAuth registration):** 83 - ```bash 84 - ./atcr-hold config/hold.yml 85 - ``` 86 - 87 - The hold service will start an OAuth flow. You'll see output like: 88 - 89 - ``` 90 - ================================================================================ 91 - OAUTH AUTHORIZATION REQUIRED 92 - ================================================================================ 93 - 94 - Please visit this URL to authorize the hold service: 95 - 96 - https://bsky.social/oauth/authorize?... 97 - 98 - Waiting for authorization... 99 - ================================================================================ 100 - ``` 101 - 102 - **Steps:** 103 - 1. Copy the OAuth URL from the logs 104 - 2. Open it in your browser 105 - 3. Sign in to Bluesky and authorize 106 - 4. The callback will complete automatically 107 - 5. Hold service registers in your PDS 108 - 109 - After successful OAuth, you'll see: 110 - ``` 111 - ✓ Created hold record: at://did:plc:.../io.atcr.hold/127.0.0.1 112 - ✓ Created crew record: at://did:plc:.../io.atcr.hold.crew/127.0.0.1-did:plc:... 113 - ================================================================================ 114 - REGISTRATION COMPLETE 115 - ================================================================================ 116 - Hold service is now registered and ready to use! 117 - ``` 118 - 119 - This creates two records in your PDS: 120 - - `io.atcr.hold` - Defines the storage endpoint URL 121 - - `io.atcr.hold.crew` - Grants you admin access 122 - 123 - ### 6. Test Docker Push/Pull 124 - 125 - **Test 1: Basic Push** 126 - ```bash 127 - # Tag an image 128 - docker tag alpine:latest localhost:5000/alice/alpine:test 129 - 130 - # Push to local registry 131 - docker push localhost:5000/alice/alpine:test 132 - ``` 133 - 134 - **Test 2: Pull** 135 - ```bash 136 - # Remove local image 137 - docker rmi localhost:5000/alice/alpine:test 138 - 139 - # Pull from registry 140 - docker pull localhost:5000/alice/alpine:test 141 - ``` 142 - 143 - **Test 3: Verify Storage** 144 - ```bash 145 - # Check manifests were stored in ATProto 146 - # (Check your PDS for io.atcr.manifest records) 147 - 148 - # Check blobs were stored locally 149 - ls -lh /var/lib/atcr/blobs/docker/registry/v2/ 150 - ``` 151 - 152 - ## OAuth Testing (Optional) 153 - 154 - ### Setup Credential Helper 155 - 156 - ```bash 157 - # Configure OAuth 158 - ./docker-credential-atcr configure 159 - 160 - # Follow the browser flow to authorize 161 - 162 - # Verify token was saved 163 - ls -la ~/.atcr/oauth-token.json 164 - ``` 165 - 166 - ### Configure Docker to Use Helper 167 - 168 - Edit `~/.docker/config.json`: 169 - ```json 170 - { 171 - "credHelpers": { 172 - "localhost:5000": "atcr" 173 - } 174 - } 175 - ``` 176 - 177 - ### Test with OAuth 178 - 179 - ```bash 180 - # Push should now use OAuth automatically 181 - docker push localhost:5000/alice/myapp:latest 182 - ``` 183 - 184 - ## Troubleshooting 185 - 186 - ### Registry won't start 187 - 188 - **Error:** `failed to create storage driver` 189 - ```bash 190 - # Check directory permissions 191 - ls -ld /var/lib/atcr/blobs 192 - # Should be owned by your user 193 - 194 - # Fix permissions 195 - sudo chown -R $USER:$USER /var/lib/atcr 196 - ``` 197 - 198 - **Error:** `address already in use` 199 - ```bash 200 - # Check what's using port 5000 201 - lsof -i :5000 202 - 203 - # Kill existing process 204 - kill $(lsof -t -i :5000) 205 - ``` 206 - 207 - ### Hold service won't start 208 - 209 - **Error:** `failed to create storage driver` 210 - ```bash 211 - # Check hold directory 212 - ls -ld /var/lib/atcr/hold 213 - sudo chown -R $USER:$USER /var/lib/atcr/hold 214 - ``` 215 - 216 - **Error:** `address already in use` 217 - ```bash 218 - # Check port 8080 219 - lsof -i :8080 220 - kill $(lsof -t -i :8080) 221 - ``` 222 - 223 - ### Docker push fails 224 - 225 - **Error:** `unauthorized: authentication required` 226 - - Check `ATPROTO_DID` and `ATPROTO_ACCESS_TOKEN` are set 227 - - Verify token is valid (not expired) 228 - - Check registry logs for auth errors 229 - 230 - **Error:** `denied: requested access to the resource is denied` 231 - - Check the identity in the image name matches your DID 232 - - Example: If your handle is `alice.bsky.social`, use: 233 - ```bash 234 - docker push localhost:5000/alice/myapp:test 235 - # NOT localhost:5000/bob/myapp:test 236 - ``` 237 - 238 - **Error:** `failed to resolve identity` 239 - - Check internet connection (needs to resolve DIDs) 240 - - Verify handle is correct 241 - - Try using DID directly instead of handle 242 - 243 - ### OAuth issues 244 - 245 - **Error:** `Failed to exchange token` 246 - - Ensure registry is running and accessible 247 - - Check `/auth/exchange` endpoint is responding 248 - - Verify OAuth token hasn't expired 249 - 250 - **Error:** `Token validation failed` 251 - - Token might be expired 252 - - Run `./docker-credential-atcr configure` again 253 - - Check PDS is accessible 254 - 255 - ## Verifying the Flow 256 - 257 - ### Check Registry is Running 258 - ```bash 259 - curl http://localhost:5000/v2/ 260 - # Should return: {} 261 - ``` 262 - 263 - ### Check Hold is Running 264 - ```bash 265 - curl http://localhost:8080/health 266 - # Should return: {"status":"ok"} 267 - ``` 268 - 269 - ### Check Auth Endpoint 270 - ```bash 271 - curl -v http://localhost:5000/v2/ 272 - # Should return 401 with WWW-Authenticate header 273 - ``` 274 - 275 - ### Inspect Stored Data 276 - 277 - **Manifests (in ATProto):** 278 - - Check your PDS web interface 279 - - Look for `io.atcr.manifest` collection records 280 - 281 - **Blobs (local filesystem):** 282 - ```bash 283 - # List blobs 284 - find /var/lib/atcr/blobs -type f 285 - 286 - # Check blob content (should be binary) 287 - ls -lh /var/lib/atcr/blobs/docker/registry/v2/blobs/sha256/ 288 - ``` 289 - 290 - ## Clean Up 291 - 292 - ### Stop Services 293 - ```bash 294 - # If using test script 295 - kill $(cat .atcr-pids) 296 - 297 - # Or manually 298 - pkill atcr-appview 299 - pkill atcr-hold 300 - ``` 301 - 302 - ### Remove Test Data 303 - ```bash 304 - # Remove all stored data 305 - sudo rm -rf /var/lib/atcr/* 306 - 307 - # Remove OAuth tokens 308 - rm -rf ~/.atcr/ 309 - ``` 310 - 311 - ### Reset Docker Config 312 - ```bash 313 - # Remove credential helper config 314 - # Edit ~/.docker/config.json and remove "credHelpers" section 315 - ``` 316 - 317 - ## Next Steps 318 - 319 - Once local testing works: 320 - 321 - 1. **Deploy to production:** 322 - - Use S3/Storj for blob storage 323 - - Deploy registry and hold to separate hosts 324 - - Configure DNS for `atcr.io` 325 - 326 - 2. **Enable BYOS:** 327 - - Users create `io.atcr.hold` records 328 - - Deploy their own hold service 329 - - AppView automatically routes to their storage 330 - 331 - 3. **Add monitoring:** 332 - - Registry metrics 333 - - Hold service metrics 334 - - Storage usage tracking