A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go
77
fork

Configure Feed

Select the types of activity you want to include in your feed.

clean up old docs add quotas prelim spec

+1289 -1901
-826
docs/API_KEY_MIGRATION.md
··· 1 - # API Key Migration Plan 2 - 3 - ## Overview 4 - 5 - Replace the session token system (used only by credential helper) with API keys that link to OAuth sessions. This simplifies authentication while maintaining all use cases. 6 - 7 - ## Current State 8 - 9 - ### Three Separate Auth Systems 10 - 11 - 1. **Session Tokens** (`pkg/auth/session/`) 12 - - JWT-like tokens: `<base64_claims>.<base64_signature>` 13 - - Created after OAuth callback, shown to user to copy 14 - - User manually pastes into credential helper config 15 - - Validated in `/auth/token` and `/auth/exchange` 16 - - 30-day TTL 17 - - **Problem:** Awkward UX, requires manual copy/paste 18 - 19 - 2. **UI Sessions** (`pkg/appview/session/`) 20 - - Cookie-based (`atcr_session`) 21 - - Random session ID, server-side store 22 - - 24-hour TTL 23 - - **Keep this - works well** 24 - 25 - 3. **App Password Auth** (via PDS) 26 - - Direct `com.atproto.server.createSession` call 27 - - No AppView involvement until token request 28 - - **Keep this - essential for non-UI users** 29 - 30 - ## Target State 31 - 32 - ### Two Auth Methods 33 - 34 - 1. **API Keys** (NEW - replaces session tokens) 35 - - Generated in UI after OAuth login 36 - - Format: `atcr_<32_bytes_base64>` 37 - - Linked to server-side OAuth refresh token 38 - - Multiple keys per user (laptop, CI/CD, etc.) 39 - - Revocable without re-auth 40 - 41 - 2. **App Passwords** (KEEP) 42 - - Direct PDS authentication 43 - - Works without UI/OAuth 44 - 45 - ### UI Sessions (UNCHANGED) 46 - - Cookie-based for web UI 47 - - Separate system, no changes needed 48 - 49 - --- 50 - 51 - ## Implementation Plan 52 - 53 - ### Phase 1: API Key System 54 - 55 - #### 1.1 Create API Key Store (`pkg/appview/apikey/store.go`) 56 - 57 - ```go 58 - package apikey 59 - 60 - import ( 61 - "crypto/rand" 62 - "encoding/base64" 63 - "encoding/json" 64 - "fmt" 65 - "os" 66 - "sync" 67 - "time" 68 - "golang.org/x/crypto/bcrypt" 69 - ) 70 - 71 - // APIKey represents a user's API key 72 - type APIKey struct { 73 - ID string `json:"id"` // UUID 74 - KeyHash string `json:"key_hash"` // bcrypt hash 75 - DID string `json:"did"` // Owner's DID 76 - Handle string `json:"handle"` // Owner's handle 77 - Name string `json:"name"` // User-provided name 78 - CreatedAt time.Time `json:"created_at"` 79 - LastUsed time.Time `json:"last_used"` 80 - } 81 - 82 - // Store manages API keys 83 - type Store struct { 84 - mu sync.RWMutex 85 - keys map[string]*APIKey // keyHash -> APIKey 86 - byDID map[string][]string // DID -> []keyHash 87 - filePath string // /var/lib/atcr/api-keys.json 88 - } 89 - 90 - // NewStore creates a new API key store 91 - func NewStore(filePath string) (*Store, error) 92 - 93 - // Generate creates a new API key and returns the plaintext key (shown once) 94 - func (s *Store) Generate(did, handle, name string) (key string, keyID string, err error) 95 - 96 - // Validate checks if an API key is valid and returns the associated data 97 - func (s *Store) Validate(key string) (*APIKey, error) 98 - 99 - // List returns all API keys for a DID (without plaintext keys) 100 - func (s *Store) List(did string) []*APIKey 101 - 102 - // Delete removes an API key 103 - func (s *Store) Delete(did, keyID string) error 104 - 105 - // UpdateLastUsed updates the last used timestamp 106 - func (s *Store) UpdateLastUsed(keyHash string) error 107 - ``` 108 - 109 - **Key Generation:** 110 - ```go 111 - func (s *Store) Generate(did, handle, name string) (string, string, error) { 112 - // Generate 32 random bytes 113 - b := make([]byte, 32) 114 - if _, err := rand.Read(b); err != nil { 115 - return "", "", err 116 - } 117 - 118 - // Format: atcr_<base64> 119 - key := "atcr_" + base64.RawURLEncoding.EncodeToString(b) 120 - 121 - // Hash for storage 122 - keyHash, err := bcrypt.GenerateFromPassword([]byte(key), bcrypt.DefaultCost) 123 - if err != nil { 124 - return "", "", err 125 - } 126 - 127 - // Generate ID 128 - keyID := generateUUID() 129 - 130 - apiKey := &APIKey{ 131 - ID: keyID, 132 - KeyHash: string(keyHash), 133 - DID: did, 134 - Handle: handle, 135 - Name: name, 136 - CreatedAt: time.Now(), 137 - LastUsed: time.Time{}, // Never used yet 138 - } 139 - 140 - s.mu.Lock() 141 - s.keys[string(keyHash)] = apiKey 142 - s.byDID[did] = append(s.byDID[did], string(keyHash)) 143 - s.mu.Unlock() 144 - 145 - s.save() 146 - 147 - // Return plaintext key (only time it's available) 148 - return key, keyID, nil 149 - } 150 - ``` 151 - 152 - **Key Validation:** 153 - ```go 154 - func (s *Store) Validate(key string) (*APIKey, error) { 155 - s.mu.RLock() 156 - defer s.mu.RUnlock() 157 - 158 - // Try to match against all stored hashes 159 - for hash, apiKey := range s.keys { 160 - if err := bcrypt.CompareHashAndPassword([]byte(hash), []byte(key)); err == nil { 161 - // Update last used asynchronously 162 - go s.UpdateLastUsed(hash) 163 - return apiKey, nil 164 - } 165 - } 166 - 167 - return nil, fmt.Errorf("invalid API key") 168 - } 169 - ``` 170 - 171 - #### 1.2 Add API Key Handlers (`pkg/appview/handlers/apikeys.go`) 172 - 173 - ```go 174 - package handlers 175 - 176 - import ( 177 - "encoding/json" 178 - "html/template" 179 - "net/http" 180 - "github.com/gorilla/mux" 181 - "atcr.io/pkg/appview/apikey" 182 - "atcr.io/pkg/appview/middleware" 183 - ) 184 - 185 - // GenerateAPIKeyHandler handles POST /api/keys 186 - type GenerateAPIKeyHandler struct { 187 - Store *apikey.Store 188 - } 189 - 190 - func (h *GenerateAPIKeyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 191 - user := middleware.GetUser(r) 192 - if user == nil { 193 - http.Error(w, "Unauthorized", http.StatusUnauthorized) 194 - return 195 - } 196 - 197 - name := r.FormValue("name") 198 - if name == "" { 199 - name = "Unnamed Key" 200 - } 201 - 202 - key, keyID, err := h.Store.Generate(user.DID, user.Handle, name) 203 - if err != nil { 204 - http.Error(w, "Failed to generate key", http.StatusInternalServerError) 205 - return 206 - } 207 - 208 - // Return key (shown once!) 209 - w.Header().Set("Content-Type", "application/json") 210 - json.NewEncoder(w).Encode(map[string]string{ 211 - "id": keyID, 212 - "key": key, 213 - }) 214 - } 215 - 216 - // ListAPIKeysHandler handles GET /api/keys 217 - type ListAPIKeysHandler struct { 218 - Store *apikey.Store 219 - } 220 - 221 - func (h *ListAPIKeysHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 222 - user := middleware.GetUser(r) 223 - if user == nil { 224 - http.Error(w, "Unauthorized", http.StatusUnauthorized) 225 - return 226 - } 227 - 228 - keys := h.Store.List(user.DID) 229 - 230 - w.Header().Set("Content-Type", "application/json") 231 - json.NewEncoder(w).Encode(keys) 232 - } 233 - 234 - // DeleteAPIKeyHandler handles DELETE /api/keys/{id} 235 - type DeleteAPIKeyHandler struct { 236 - Store *apikey.Store 237 - } 238 - 239 - func (h *DeleteAPIKeyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 240 - user := middleware.GetUser(r) 241 - if user == nil { 242 - http.Error(w, "Unauthorized", http.StatusUnauthorized) 243 - return 244 - } 245 - 246 - vars := mux.Vars(r) 247 - keyID := vars["id"] 248 - 249 - if err := h.Store.Delete(user.DID, keyID); err != nil { 250 - http.Error(w, "Failed to delete key", http.StatusInternalServerError) 251 - return 252 - } 253 - 254 - w.WriteHeader(http.StatusNoContent) 255 - } 256 - ``` 257 - 258 - ### Phase 2: Update Token Handler 259 - 260 - #### 2.1 Modify `/auth/token` Handler (`pkg/auth/token/handler.go`) 261 - 262 - ```go 263 - type Handler struct { 264 - issuer *Issuer 265 - validator *atproto.SessionValidator 266 - apiKeyStore *apikey.Store // NEW 267 - defaultHoldEndpoint string 268 - } 269 - 270 - func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 271 - username, password, ok := r.BasicAuth() 272 - if !ok { 273 - return unauthorized 274 - } 275 - 276 - var did, handle, accessToken string 277 - 278 - // 1. Check if it's an API key (NEW) 279 - if strings.HasPrefix(password, "atcr_") { 280 - apiKey, err := h.apiKeyStore.Validate(password) 281 - if err != nil { 282 - fmt.Printf("DEBUG [token/handler]: API key validation failed: %v\n", err) 283 - return unauthorized 284 - } 285 - 286 - did = apiKey.DID 287 - handle = apiKey.Handle 288 - fmt.Printf("DEBUG [token/handler]: API key validated for DID=%s, handle=%s\n", did, handle) 289 - 290 - // API key is linked to OAuth session 291 - // OAuth refresher will provide access token when needed via middleware 292 - } 293 - // 2. Try app password (direct PDS) 294 - else { 295 - did, handle, accessToken, err = h.validator.CreateSessionAndGetToken(r.Context(), username, password) 296 - if err != nil { 297 - fmt.Printf("DEBUG [token/handler]: App password validation failed: %v\n", err) 298 - return unauthorized 299 - } 300 - 301 - fmt.Printf("DEBUG [token/handler]: App password validated, DID=%s\n", did) 302 - 303 - // Cache access token for manifest operations 304 - auth.GetGlobalTokenCache().Set(did, accessToken, 2*time.Hour) 305 - 306 - // Ensure profile exists 307 - // ... existing code ... 308 - } 309 - 310 - // Rest of handler: validate access, issue JWT, etc. 311 - // ... existing code ... 312 - } 313 - ``` 314 - 315 - **Key Changes:** 316 - - Remove session token validation (`sessionManager.Validate()`) 317 - - Add API key check as first priority 318 - - Keep app password as fallback 319 - - API keys use OAuth refresher (server-side), app passwords use token cache (client-side) 320 - 321 - #### 2.2 Remove `/auth/exchange` Endpoint 322 - 323 - The `/auth/exchange` endpoint was only used for exchanging session tokens for registry JWTs. With API keys, this is no longer needed. 324 - 325 - **Files to delete:** 326 - - `pkg/auth/exchange/handler.go` 327 - 328 - **Files to update:** 329 - - `cmd/appview/serve.go` - Remove exchange handler registration 330 - 331 - ### Phase 3: Update UI 332 - 333 - #### 3.1 Add API Keys Section to Settings Page 334 - 335 - **Template** (`pkg/appview/templates/settings.html`): 336 - 337 - ```html 338 - <!-- Add after existing profile settings --> 339 - <section class="api-keys"> 340 - <h2>API Keys</h2> 341 - <p>Generate API keys for Docker CLI and CI/CD. Each key is linked to your OAuth session.</p> 342 - 343 - <!-- Generate New Key --> 344 - <div class="generate-key"> 345 - <h3>Generate New API Key</h3> 346 - <form id="generate-key-form"> 347 - <input type="text" id="key-name" placeholder="Key name (e.g., My Laptop)" required> 348 - <button type="submit">Generate Key</button> 349 - </form> 350 - </div> 351 - 352 - <!-- Key Generated Modal (shown once) --> 353 - <div id="key-modal" class="modal hidden"> 354 - <div class="modal-content"> 355 - <h3>✓ API Key Generated!</h3> 356 - <p><strong>Copy this key now - it won't be shown again:</strong></p> 357 - <div class="key-display"> 358 - <code id="generated-key"></code> 359 - <button onclick="copyKey()">Copy to Clipboard</button> 360 - </div> 361 - <div class="usage-instructions"> 362 - <h4>Using with Docker:</h4> 363 - <pre>docker login atcr.io -u <span class="handle">{{.Profile.Handle}}</span> -p <span class="key-placeholder">[paste key here]</span></pre> 364 - </div> 365 - <button onclick="closeModal()">Done</button> 366 - </div> 367 - </div> 368 - 369 - <!-- Existing Keys List --> 370 - <div class="keys-list"> 371 - <h3>Your API Keys</h3> 372 - <table> 373 - <thead> 374 - <tr> 375 - <th>Name</th> 376 - <th>Created</th> 377 - <th>Last Used</th> 378 - <th>Actions</th> 379 - </tr> 380 - </thead> 381 - <tbody id="keys-table"> 382 - <!-- Populated via JavaScript --> 383 - </tbody> 384 - </table> 385 - </div> 386 - </section> 387 - 388 - <script> 389 - // Generate key 390 - document.getElementById('generate-key-form').addEventListener('submit', async (e) => { 391 - e.preventDefault(); 392 - const name = document.getElementById('key-name').value; 393 - 394 - const resp = await fetch('/api/keys', { 395 - method: 'POST', 396 - headers: {'Content-Type': 'application/x-www-form-urlencoded'}, 397 - body: `name=${encodeURIComponent(name)}` 398 - }); 399 - 400 - const data = await resp.json(); 401 - 402 - // Show key in modal (only time it's available) 403 - document.getElementById('generated-key').textContent = data.key; 404 - document.getElementById('key-modal').classList.remove('hidden'); 405 - 406 - // Refresh keys list 407 - loadKeys(); 408 - }); 409 - 410 - // Copy key to clipboard 411 - function copyKey() { 412 - const key = document.getElementById('generated-key').textContent; 413 - navigator.clipboard.writeText(key); 414 - alert('Copied to clipboard!'); 415 - } 416 - 417 - // Load existing keys 418 - async function loadKeys() { 419 - const resp = await fetch('/api/keys'); 420 - const keys = await resp.json(); 421 - 422 - const tbody = document.getElementById('keys-table'); 423 - tbody.innerHTML = keys.map(key => ` 424 - <tr> 425 - <td>${key.name}</td> 426 - <td>${new Date(key.created_at).toLocaleDateString()}</td> 427 - <td>${key.last_used ? new Date(key.last_used).toLocaleDateString() : 'Never'}</td> 428 - <td><button onclick="deleteKey('${key.id}')">Revoke</button></td> 429 - </tr> 430 - `).join(''); 431 - } 432 - 433 - // Delete key 434 - async function deleteKey(id) { 435 - if (!confirm('Are you sure you want to revoke this key?')) return; 436 - 437 - await fetch(`/api/keys/${id}`, { method: 'DELETE' }); 438 - loadKeys(); 439 - } 440 - 441 - // Load keys on page load 442 - loadKeys(); 443 - </script> 444 - 445 - <style> 446 - .modal.hidden { display: none; } 447 - .modal { 448 - position: fixed; 449 - top: 0; 450 - left: 0; 451 - width: 100%; 452 - height: 100%; 453 - background: rgba(0,0,0,0.5); 454 - display: flex; 455 - align-items: center; 456 - justify-content: center; 457 - } 458 - .modal-content { 459 - background: white; 460 - padding: 2rem; 461 - border-radius: 8px; 462 - max-width: 600px; 463 - } 464 - .key-display { 465 - background: #f5f5f5; 466 - padding: 1rem; 467 - margin: 1rem 0; 468 - border-radius: 4px; 469 - } 470 - .key-display code { 471 - word-break: break-all; 472 - font-size: 14px; 473 - } 474 - .usage-instructions { 475 - margin-top: 1rem; 476 - padding: 1rem; 477 - background: #e3f2fd; 478 - border-radius: 4px; 479 - } 480 - .usage-instructions pre { 481 - background: #263238; 482 - color: #aed581; 483 - padding: 1rem; 484 - border-radius: 4px; 485 - overflow-x: auto; 486 - } 487 - .handle { color: #ffab40; } 488 - .key-placeholder { color: #64b5f6; } 489 - </style> 490 - ``` 491 - 492 - #### 3.2 Register API Key Routes (`cmd/appview/serve.go`) 493 - 494 - ```go 495 - // In initializeUI() function, add: 496 - 497 - // API key management routes (authenticated) 498 - authRouter.Handle("/api/keys", &uihandlers.GenerateAPIKeyHandler{ 499 - Store: apiKeyStore, 500 - }).Methods("POST") 501 - 502 - authRouter.Handle("/api/keys", &uihandlers.ListAPIKeysHandler{ 503 - Store: apiKeyStore, 504 - }).Methods("GET") 505 - 506 - authRouter.Handle("/api/keys/{id}", &uihandlers.DeleteAPIKeyHandler{ 507 - Store: apiKeyStore, 508 - }).Methods("DELETE") 509 - ``` 510 - 511 - ### Phase 4: Update Credential Helper 512 - 513 - #### 4.1 Simplify Configuration (`cmd/credential-helper/main.go`) 514 - 515 - ```go 516 - // SessionStore becomes CredentialStore 517 - type CredentialStore struct { 518 - Handle string `json:"handle"` 519 - APIKey string `json:"api_key"` 520 - AppViewURL string `json:"appview_url"` 521 - } 522 - 523 - func handleConfigure(handle string) { 524 - fmt.Println("ATCR Credential Helper Configuration") 525 - fmt.Println("=====================================") 526 - fmt.Println() 527 - fmt.Println("You need an API key from the ATCR web UI.") 528 - fmt.Println() 529 - 530 - appViewURL := os.Getenv("ATCR_APPVIEW_URL") 531 - if appViewURL == "" { 532 - appViewURL = defaultAppViewURL 533 - } 534 - 535 - // Auto-open settings page 536 - settingsURL := appViewURL + "/settings" 537 - fmt.Printf("Opening settings page: %s\n", settingsURL) 538 - fmt.Println("Log in and generate an API key if you haven't already.") 539 - fmt.Println() 540 - 541 - if err := oauth.OpenBrowser(settingsURL); err != nil { 542 - fmt.Printf("Could not open browser. Please visit: %s\n\n", settingsURL) 543 - } 544 - 545 - // Prompt for credentials 546 - if handle == "" { 547 - fmt.Print("Enter your ATProto handle (e.g., alice.bsky.social): ") 548 - fmt.Scanln(&handle) 549 - } else { 550 - fmt.Printf("Using handle: %s\n", handle) 551 - } 552 - 553 - fmt.Print("Enter your API key (from settings page): ") 554 - var apiKey string 555 - fmt.Scanln(&apiKey) 556 - 557 - // Validate key format 558 - if !strings.HasPrefix(apiKey, "atcr_") { 559 - fmt.Fprintf(os.Stderr, "Invalid API key format. Key should start with 'atcr_'\n") 560 - os.Exit(1) 561 - } 562 - 563 - // Save credentials 564 - creds := &CredentialStore{ 565 - Handle: handle, 566 - APIKey: apiKey, 567 - AppViewURL: appViewURL, 568 - } 569 - 570 - if err := saveCredentials(getCredentialsPath(), creds); err != nil { 571 - fmt.Fprintf(os.Stderr, "Error saving credentials: %v\n", err) 572 - os.Exit(1) 573 - } 574 - 575 - fmt.Println() 576 - fmt.Println("✓ Configuration complete!") 577 - fmt.Println("You can now use docker push/pull with atcr.io") 578 - } 579 - 580 - func handleGet() { 581 - var serverURL string 582 - fmt.Fscanln(os.Stdin, &serverURL) 583 - 584 - // Load credentials 585 - creds, err := loadCredentials(getCredentialsPath()) 586 - if err != nil { 587 - fmt.Fprintf(os.Stderr, "Error loading credentials: %v\n", err) 588 - fmt.Fprintf(os.Stderr, "Please run: docker-credential-atcr configure\n") 589 - os.Exit(1) 590 - } 591 - 592 - // Return credentials for Docker 593 - // Docker will send these as Basic Auth to /auth/token 594 - response := Credentials{ 595 - ServerURL: serverURL, 596 - Username: creds.Handle, 597 - Secret: creds.APIKey, // API key as password 598 - } 599 - 600 - json.NewEncoder(os.Stdout).Encode(response) 601 - } 602 - ``` 603 - 604 - **File Rename:** 605 - - `~/.atcr/session.json` → `~/.atcr/credentials.json` 606 - 607 - ### Phase 5: Remove Session Token System 608 - 609 - #### 5.1 Delete Session Token Files 610 - 611 - **Files to delete:** 612 - - `pkg/auth/session/handler.go` 613 - - `pkg/auth/exchange/handler.go` 614 - 615 - #### 5.2 Update OAuth Server (`pkg/auth/oauth/server.go`) 616 - 617 - **Remove session token creation:** 618 - ```go 619 - // OLD (delete this): 620 - sessionToken, err := s.sessionManager.Create(did, handle) 621 - if err != nil { 622 - s.renderError(w, fmt.Sprintf("Failed to create session token: %v", err)) 623 - return 624 - } 625 - 626 - // Check if this is a UI login... 627 - if cookie, err := r.Cookie("oauth_return_to"); err == nil && s.uiSessionStore != nil { 628 - // UI flow... 629 - } else { 630 - // Render success page with session token (for credential helper) 631 - s.renderSuccess(w, sessionToken, handle) 632 - } 633 - ``` 634 - 635 - **NEW (replace with):** 636 - ```go 637 - // Check if this is a UI login 638 - if cookie, err := r.Cookie("oauth_return_to"); err == nil && s.uiSessionStore != nil { 639 - // Create UI session 640 - uiSessionID, err := s.uiSessionStore.Create(did, handle, sessionData.HostURL, 24*time.Hour) 641 - // ... set cookie, redirect ... 642 - } else { 643 - // Non-UI flow: redirect to settings to get API key 644 - s.renderRedirectToSettings(w, handle) 645 - } 646 - ``` 647 - 648 - **Add redirect to settings template:** 649 - ```go 650 - func (s *Server) renderRedirectToSettings(w http.ResponseWriter, handle string) { 651 - tmpl := template.Must(template.New("redirect").Parse(` 652 - <!DOCTYPE html> 653 - <html> 654 - <head> 655 - <title>Authorization Successful - ATCR</title> 656 - <meta http-equiv="refresh" content="3;url=/settings"> 657 - </head> 658 - <body> 659 - <h1>✓ Authorization Successful!</h1> 660 - <p>Redirecting to settings page to generate your API key...</p> 661 - <p>If not redirected, <a href="/settings">click here</a>.</p> 662 - </body> 663 - </html> 664 - `)) 665 - w.Header().Set("Content-Type", "text/html") 666 - tmpl.Execute(w, nil) 667 - } 668 - ``` 669 - 670 - #### 5.3 Update Server Constructor 671 - 672 - ```go 673 - // Remove sessionManager parameter 674 - func NewServer(app *App) *Server { 675 - return &Server{ 676 - app: app, 677 - } 678 - } 679 - ``` 680 - 681 - #### 5.4 Update Registry Initialization (`cmd/appview/serve.go`) 682 - 683 - ```go 684 - // REMOVE session manager creation: 685 - // sessionManager, err := session.NewManagerWithPersistentSecret(secretPath, 30*24*time.Hour) 686 - 687 - // Create API key store 688 - apiKeyStorePath := filepath.Join(filepath.Dir(storagePath), "api-keys.json") 689 - apiKeyStore, err := apikey.NewStore(apiKeyStorePath) 690 - if err != nil { 691 - return fmt.Errorf("failed to create API key store: %w", err) 692 - } 693 - 694 - // OAuth server doesn't need session manager anymore 695 - oauthServer := oauth.NewServer(oauthApp) 696 - oauthServer.SetRefresher(refresher) 697 - if uiSessionStore != nil { 698 - oauthServer.SetUISessionStore(uiSessionStore) 699 - } 700 - 701 - // Token handler gets API key store instead of session manager 702 - if issuer != nil { 703 - tokenHandler := token.NewHandler(issuer, apiKeyStore, defaultHoldEndpoint) 704 - tokenHandler.RegisterRoutes(mux) 705 - 706 - // Remove exchange handler registration (no longer needed) 707 - } 708 - ``` 709 - 710 - --- 711 - 712 - ## Migration Path 713 - 714 - ### For Existing Users 715 - 716 - **Option 1: Smooth Migration (Recommended)** 717 - 1. Keep session token validation temporarily with deprecation warning 718 - 2. When session token is used, log warning and return special response header 719 - 3. Docker client shows warning: "Session tokens deprecated, please regenerate API key" 720 - 4. Remove session token support in next major version 721 - 722 - **Option 2: Hard Cutover** 723 - 1. Deploy new version with API keys 724 - 2. Session tokens stop working immediately 725 - 3. Users must reconfigure: `docker-credential-atcr configure` 726 - 4. Cleaner but disruptive 727 - 728 - ### Rollout Plan 729 - 730 - **Week 1: Deploy API Keys** 731 - - Add API key system 732 - - Keep session token validation 733 - - Add deprecation notice to OAuth callback 734 - 735 - **Week 2-4: Migration Period** 736 - - Monitor API key adoption 737 - - Email users about migration 738 - - Provide migration guide 739 - 740 - **Week 5: Remove Session Tokens** 741 - - Delete session token code 742 - - Force users to API keys 743 - 744 - --- 745 - 746 - ## Testing Plan 747 - 748 - ### Unit Tests 749 - 750 - 1. **API Key Store** 751 - - Test key generation (format, uniqueness) 752 - - Test key validation (correct/incorrect keys) 753 - - Test bcrypt hashing 754 - - Test key listing/deletion 755 - 756 - 2. **Token Handler** 757 - - Test API key authentication 758 - - Test app password authentication 759 - - Test invalid credentials 760 - - Test key format validation 761 - 762 - ### Integration Tests 763 - 764 - 1. **Full Auth Flow** 765 - - UI login → OAuth → API key generation 766 - - Credential helper → API key → registry JWT 767 - - App password → registry JWT 768 - 769 - 2. **Docker Client Tests** 770 - - `docker login -u handle -p api_key` 771 - - `docker login -u handle -p app_password` 772 - - `docker push` with API key 773 - - `docker pull` with API key 774 - 775 - ### Security Tests 776 - 777 - 1. **Key Security** 778 - - Verify bcrypt hashing (not plaintext storage) 779 - - Test key shown only once 780 - - Test key revocation 781 - - Test unauthorized key access 782 - 783 - 2. **OAuth Security** 784 - - Verify API key links to correct OAuth session 785 - - Test expired refresh token handling 786 - - Test multiple keys for same user 787 - 788 - --- 789 - 790 - ## Files Changed 791 - 792 - ### New Files 793 - - `pkg/appview/apikey/store.go` - API key storage and validation 794 - - `pkg/appview/handlers/apikeys.go` - API key HTTP handlers 795 - - `docs/API_KEY_MIGRATION.md` - This document 796 - 797 - ### Modified Files 798 - - `pkg/auth/token/handler.go` - Add API key validation, remove session token 799 - - `pkg/auth/oauth/server.go` - Remove session token creation, redirect to settings 800 - - `pkg/appview/handlers/settings.go` - Add API key management UI 801 - - `pkg/appview/templates/settings.html` - Add API key section 802 - - `cmd/credential-helper/main.go` - Simplify to use API keys 803 - - `cmd/appview/serve.go` - Initialize API key store, remove session manager 804 - 805 - ### Deleted Files 806 - - `pkg/auth/session/handler.go` - Session token system 807 - - `pkg/auth/exchange/handler.go` - Exchange endpoint (no longer needed) 808 - 809 - --- 810 - 811 - ## Advantages 812 - 813 - ✅ **Simpler Auth:** Two methods instead of three (API keys + app passwords) 814 - ✅ **Better UX:** No manual copy/paste of session tokens 815 - ✅ **Multiple Keys:** Users can have laptop key, CI key, etc. 816 - ✅ **Revocable:** Revoke individual keys without re-auth 817 - ✅ **Server-Side OAuth:** Refresh tokens stay on server, not in client files 818 - ✅ **Familiar Pattern:** Matches AWS ECR, GitHub tokens, etc. 819 - 820 - ## Backward Compatibility 821 - 822 - ⚠️ **Breaking Change:** Session tokens will stop working 823 - ✅ **App passwords:** Still work (no changes) 824 - ✅ **UI sessions:** Still work (separate system) 825 - 826 - **Migration Required:** Users with session tokens must run `docker-credential-atcr configure` again to get API keys.
-281
docs/OAUTH.md
··· 1 - # ATCR OAuth Implementation 2 - 3 - ## Overview 4 - 5 - ATCR now supports ATProto OAuth authentication via Docker credential helpers. This allows users to authenticate with their ATProto identity (Bluesky account) and use Docker push/pull commands seamlessly. 6 - 7 - ## Architecture 8 - 9 - ### Components 10 - 11 - 1. **OAuth Client** (`pkg/auth/oauth/`) 12 - - Full ATProto OAuth implementation with DPoP support 13 - - Uses `authelia.com/client/oauth2` for OAuth + PAR 14 - - Uses `github.com/AxisCommunications/go-dpop` for DPoP proof generation 15 - - Automatic authorization server discovery 16 - - PKCE support for security 17 - 18 - 2. **Credential Helper** (`cmd/credential-helper/`) 19 - - Standalone binary: `docker-credential-atcr` 20 - - Implements Docker credential helper protocol 21 - - Manages OAuth flow with browser 22 - - Stores tokens securely in `~/.atcr/oauth-token.json` 23 - 24 - 3. **Registry Integration** 25 - - `/auth/exchange` endpoint exchanges OAuth tokens for registry JWTs 26 - - Existing `/auth/token` endpoint for standard Docker auth 27 - 28 - ## Dependencies 29 - 30 - - `authelia.com/client/oauth2` - OAuth client with PAR support (2⭐, Authelia-backed) 31 - - `github.com/AxisCommunications/go-dpop` - DPoP implementation (10⭐, RFC 9449 compliant) 32 - - `github.com/golang-jwt/jwt/v5` - JWT library (transitive, 11k+⭐) 33 - 34 - ## Usage 35 - 36 - ### Setup 37 - 38 - 1. Build the credential helper: 39 - ```bash 40 - go build -o docker-credential-atcr ./cmd/credential-helper 41 - ``` 42 - 43 - 2. Install it in your PATH: 44 - ```bash 45 - sudo mv docker-credential-atcr /usr/local/bin/ 46 - ``` 47 - 48 - 3. Configure Docker to use it by editing `~/.docker/config.json`: 49 - ```json 50 - { 51 - "credsStore": "atcr" 52 - } 53 - ``` 54 - 55 - ### Configuration 56 - 57 - Run the OAuth flow: 58 - ```bash 59 - docker-credential-atcr configure 60 - ``` 61 - 62 - This will: 63 - 1. Prompt for your ATProto handle (e.g., `alice.bsky.social`) 64 - 2. Open your browser for OAuth authorization 65 - 3. Store the OAuth token and DPoP key in `~/.atcr/oauth-token.json` 66 - 67 - ### Using with Docker 68 - 69 - Once configured, use Docker normally: 70 - 71 - ```bash 72 - # Push an image 73 - docker push atcr.io/alice/myapp:latest 74 - 75 - # Pull an image 76 - docker pull atcr.io/alice/myapp:latest 77 - ``` 78 - 79 - The credential helper automatically: 80 - 1. Loads your stored OAuth token 81 - 2. Refreshes it if expired 82 - 3. Exchanges it for a registry JWT 83 - 4. Provides the JWT to Docker 84 - 85 - ## How It Works 86 - 87 - ### OAuth Flow 88 - 89 - 1. **User runs** `docker-credential-atcr configure` 90 - 2. **Resolve identity**: alice.bsky.social → DID → PDS endpoint 91 - 3. **Discover auth server**: GET `{pds}/.well-known/oauth-authorization-server` 92 - 4. **Generate DPoP key**: ECDSA P-256 key pair 93 - 5. **PAR request**: POST to PAR endpoint with DPoP header + PKCE challenge 94 - 6. **Open browser**: User authorizes on their PDS 95 - 7. **Receive code**: Callback to `localhost:8888/callback` 96 - 8. **Exchange code**: POST to token endpoint with DPoP header + PKCE verifier 97 - 9. **Save tokens**: Store OAuth token + DPoP key + DID/handle 98 - 99 - ### Docker Push/Pull Flow 100 - 101 - 1. **Docker needs credentials** for `atcr.io` 102 - 2. **Calls credential helper**: `docker-credential-atcr get` 103 - 3. **Helper loads token** from `~/.atcr/oauth-token.json` 104 - 4. **Refresh if needed**: Uses refresh token + DPoP if expired 105 - 5. **Exchange for registry JWT**: POST to `/auth/exchange` with OAuth token + handle 106 - 6. **Registry validates token**: Calls `getSession` on PDS to validate token 107 - 7. **Registry issues JWT**: Creates registry JWT with validated DID/handle 108 - 8. **Return to Docker**: `{"Username": "oauth2", "Secret": "<jwt>"}` 109 - 9. **Docker uses JWT**: For authentication to registry API 110 - 111 - ## Security 112 - 113 - ### DPoP (Demonstrating Proof-of-Possession) 114 - 115 - Every OAuth request includes a DPoP proof: 116 - - Unique JWT signed with ECDSA private key 117 - - Contains HTTP method, URL, timestamp, nonce 118 - - Public key (JWK) included in JWT header 119 - - Binds the token to the specific client 120 - 121 - ### PKCE (Proof Key for Code Exchange) 122 - 123 - - Code verifier generated locally 124 - - Code challenge sent in authorization request 125 - - Verifier sent in token exchange 126 - - Prevents authorization code interception 127 - 128 - ### Token Storage 129 - 130 - - Tokens stored in `~/.atcr/oauth-token.json` 131 - - File permissions: 0600 (owner read/write only) 132 - - DPoP key stored in PEM format 133 - - Refresh tokens for long-term access 134 - 135 - ## Implementation Details 136 - 137 - ### Code Structure 138 - 139 - ``` 140 - pkg/auth/oauth/ 141 - ├── client.go # OAuth client with DPoP 142 - ├── discovery.go # Authorization server discovery 143 - ├── metadata.go # Client metadata document 144 - ├── storage.go # Token persistence 145 - └── transport.go # DPoP HTTP transport 146 - 147 - pkg/auth/atproto/ 148 - ├── session.go # ATProto session validation (Basic auth) 149 - └── validator.go # OAuth token validation via getSession 150 - 151 - cmd/credential-helper/ 152 - ├── main.go # Docker credential helper protocol 153 - ├── oauth.go # OAuth flow orchestration 154 - └── token.go # Token management 155 - 156 - pkg/auth/exchange/ 157 - └── handler.go # OAuth → Registry JWT exchange 158 - ``` 159 - 160 - ### Key Classes 161 - 162 - **OAuth Client** (`pkg/auth/oauth/client.go`) 163 - - `NewClient()` - Create client with DPoP key 164 - - `InitializeForHandle()` - Discover auth server 165 - - `AuthorizeURL()` - Generate authorization URL with PAR + PKCE 166 - - `Exchange()` - Exchange code for token with DPoP 167 - - `RefreshToken()` - Refresh expired token with DPoP 168 - 169 - **DPoP Transport** (`pkg/auth/oauth/transport.go`) 170 - - Implements `http.RoundTripper` 171 - - Automatically adds DPoP header to all requests 172 - - Handles nonce management and retries 173 - - Used by OAuth client for all HTTP requests 174 - 175 - **Token Store** (`pkg/auth/oauth/storage.go`) 176 - - Persists OAuth tokens and DPoP key 177 - - PEM encoding for private key 178 - - Expiration checking 179 - - Secure file permissions 180 - 181 - **Token Validator** (`pkg/auth/atproto/validator.go`) 182 - - `ValidateToken()` - Validate token via PDS getSession 183 - - `ValidateTokenWithResolver()` - Auto-resolve PDS from handle 184 - - Returns validated DID and handle 185 - - Used by registry to verify OAuth tokens 186 - 187 - ## Testing 188 - 189 - ### Manual Testing 190 - 191 - 1. Configure the helper: 192 - ```bash 193 - ./docker-credential-atcr configure 194 - # Enter handle: alice.bsky.social 195 - # Browser opens for authorization 196 - # Token saved to ~/.atcr/oauth-token.json 197 - ``` 198 - 199 - 2. Test credential retrieval: 200 - ```bash 201 - echo '{"ServerURL": "atcr.io"}' | ./docker-credential-atcr get 202 - # Should return: {"Username":"oauth2","Secret":"<jwt>"} 203 - ``` 204 - 205 - 3. Test with Docker: 206 - ```bash 207 - docker push atcr.io/alice/test:latest 208 - ``` 209 - 210 - ### Integration Testing 211 - 212 - TODO: Add automated tests for: 213 - - OAuth flow with mock PDS 214 - - DPoP proof generation 215 - - Token exchange 216 - - Credential helper protocol 217 - 218 - ## Security Features 219 - 220 - ### OAuth Token Validation 221 - 222 - The registry validates ATProto OAuth tokens by calling `com.atproto.server.getSession` on the user's PDS. This ensures: 223 - - Token is valid and not expired 224 - - Token belongs to the claimed user 225 - - User's DID and handle are extracted from the PDS response 226 - - No trust in client-provided identity information 227 - 228 - **Flow:** 229 - 1. Client sends OAuth token + handle to `/auth/exchange` 230 - 2. Registry resolves handle → PDS endpoint 231 - 3. Registry calls `{pds}/xrpc/com.atproto.server.getSession` with token 232 - 4. PDS validates token and returns session info (DID, handle) 233 - 5. Registry uses validated DID/handle to issue registry JWT 234 - 235 - ## Future Improvements 236 - 237 - 1. **Token refresh in background** 238 - - Proactively refresh before expiry 239 - - Reduce latency on Docker commands 240 - 241 - 3. **Multiple account support** 242 - - Store tokens for multiple handles 243 - - Allow selecting which account to use 244 - 245 - 4. **Revocation support** 246 - - Implement token revocation 247 - - Clean up on logout 248 - 249 - 5. **Better error messages** 250 - - User-friendly OAuth error handling 251 - - Guide users through common issues 252 - 253 - ## Troubleshooting 254 - 255 - ### "Failed to resolve identity" 256 - - Check internet connection 257 - - Verify handle is correct (e.g., `alice.bsky.social`) 258 - - Ensure PDS is accessible 259 - 260 - ### "Authorization timed out" 261 - - Complete authorization within 5 minutes 262 - - Check if browser opened correctly 263 - - Try running `configure` again 264 - 265 - ### "Token expired" 266 - - Credential helper should auto-refresh 267 - - If persistent, run `configure` again 268 - - Check `~/.atcr/oauth-token.json` permissions 269 - 270 - ### "Failed to exchange token" 271 - - Ensure registry is running 272 - - Check `/auth/exchange` endpoint is accessible 273 - - Verify token hasn't been revoked 274 - 275 - ## References 276 - 277 - - [ATProto OAuth Specification](https://atproto.com/specs/oauth) 278 - - [RFC 9449: DPoP](https://datatracker.ietf.org/doc/html/rfc9449) 279 - - [RFC 9126: PAR](https://datatracker.ietf.org/doc/html/rfc9126) 280 - - [RFC 7636: PKCE](https://datatracker.ietf.org/doc/html/rfc7636) 281 - - [Docker Credential Helpers](https://github.com/docker/docker-credential-helpers)
+1289
docs/QUOTAS.md
··· 1 + # ATCR Quota System 2 + 3 + This document describes ATCR's storage quota implementation, inspired by Harbor's proven approach to per-project blob tracking with deduplication. 4 + 5 + ## Table of Contents 6 + 7 + - [Overview](#overview) 8 + - [Harbor's Approach (Reference Implementation)](#harbors-approach-reference-implementation) 9 + - [Storage Options](#storage-options) 10 + - [Quota Data Model](#quota-data-model) 11 + - [Push Flow (Detailed)](#push-flow-detailed) 12 + - [Delete Flow](#delete-flow) 13 + - [Garbage Collection](#garbage-collection) 14 + - [Quota Reconciliation](#quota-reconciliation) 15 + - [Configuration](#configuration) 16 + - [Trade-offs & Design Decisions](#trade-offs--design-decisions) 17 + - [Future Enhancements](#future-enhancements) 18 + 19 + ## Overview 20 + 21 + ATCR implements per-user storage quotas to: 22 + 1. **Limit storage consumption** on shared hold services 23 + 2. **Track actual S3 costs** (what new data was added) 24 + 3. **Benefit from deduplication** (users only pay once per layer) 25 + 4. **Provide transparency** (show users their storage usage) 26 + 27 + **Key principle:** Users pay for layers they've uploaded, but only ONCE per layer regardless of how many images reference it. 28 + 29 + ### Example Scenario 30 + 31 + ``` 32 + Alice pushes myapp:v1 (layers A, B, C - each 100MB) 33 + → Alice's quota: +300MB (all new layers) 34 + 35 + Alice pushes myapp:v2 (layers A, B, D) 36 + → Layers A, B already claimed by Alice 37 + → Layer D is new (100MB) 38 + → Alice's quota: +100MB (only D is new) 39 + → Total: 400MB 40 + 41 + Bob pushes his-app:latest (layers A, E) 42 + → Layer A already exists in S3 (uploaded by Alice) 43 + → Bob claims it for first time → +100MB to Bob's quota 44 + → Layer E is new → +100MB to Bob's quota 45 + → Bob's quota: 200MB 46 + 47 + Physical S3 storage: 500MB (A, B, C, D, E) 48 + Claimed storage: 600MB (Alice: 400MB, Bob: 200MB) 49 + Deduplication savings: 100MB (layer A shared) 50 + ``` 51 + 52 + ## Harbor's Approach (Reference Implementation) 53 + 54 + Harbor is built on distribution/distribution (same as ATCR) and implements quotas as middleware. Their approach: 55 + 56 + ### Key Insights from Harbor 57 + 58 + 1. **"Shared blobs are only computed once per project"** 59 + - Each project tracks which blobs it has uploaded 60 + - Same blob used in multiple images counts only once per project 61 + - Different projects claiming the same blob each pay for it 62 + 63 + 2. **Quota checked when manifest is pushed** 64 + - Blobs upload first (presigned URLs, can't intercept) 65 + - Manifest pushed last → quota check happens here 66 + - Can reject manifest if quota exceeded (orphaned blobs cleaned by GC) 67 + 68 + 3. **Middleware-based implementation** 69 + - distribution/distribution has NO built-in quota support 70 + - Harbor added it as request preprocessing middleware 71 + - Uses database (PostgreSQL) or Redis for quota storage 72 + 73 + 4. **Per-project ownership model** 74 + - Blobs are physically deduplicated globally 75 + - Quota accounting is logical (per-project claims) 76 + - Total claimed storage can exceed physical storage 77 + 78 + ### References 79 + 80 + - Harbor Quota Documentation: https://goharbor.io/docs/1.10/administration/configure-project-quotas/ 81 + - Harbor Source: https://github.com/goharbor/harbor (see `src/controller/quota`) 82 + 83 + ## Storage Options 84 + 85 + The hold service needs to store quota data somewhere. Two options: 86 + 87 + ### Option 1: S3-Based Storage (Recommended for BYOS) 88 + 89 + Store quota metadata alongside blobs in the same S3 bucket: 90 + 91 + ``` 92 + Bucket structure: 93 + /docker/registry/v2/blobs/sha256/ab/abc123.../data ← actual blobs 94 + /atcr/quota/did:plc:alice.json ← quota tracking 95 + /atcr/quota/did:plc:bob.json 96 + ``` 97 + 98 + **Pros:** 99 + - ✅ No separate database needed 100 + - ✅ Single S3 bucket (better UX - no second bucket to configure) 101 + - ✅ Quota data lives with the blobs 102 + - ✅ Hold service stays relatively stateless 103 + - ✅ Works with any S3-compatible service (Storj, Minio, Upcloud, Fly.io) 104 + 105 + **Cons:** 106 + - ❌ Slower than local database (network round-trip) 107 + - ❌ Eventual consistency issues 108 + - ❌ Race conditions on concurrent updates 109 + - ❌ Extra S3 API costs (GET/PUT per upload) 110 + 111 + **Performance:** 112 + - Each blob upload: 1 HEAD (blob exists?) + 1 GET (quota) + 1 PUT (update quota) 113 + - Typical latency: 100-200ms total overhead 114 + - For high-throughput registries, consider SQLite 115 + 116 + ### Option 2: SQLite Database (Recommended for Shared Holds) 117 + 118 + Local database in hold service: 119 + 120 + ```bash 121 + /var/lib/atcr/hold-quota.db 122 + ``` 123 + 124 + **Pros:** 125 + - ✅ Fast local queries (no network latency) 126 + - ✅ ACID transactions (no race conditions) 127 + - ✅ Efficient for high-throughput registries 128 + - ✅ Can use foreign keys and joins 129 + 130 + **Cons:** 131 + - ❌ Makes hold service stateful (persistent volume needed) 132 + - ❌ Not ideal for ephemeral BYOS deployments 133 + - ❌ Backup/restore complexity 134 + - ❌ Multi-instance scaling requires shared database 135 + 136 + **Schema:** 137 + ```sql 138 + CREATE TABLE user_quotas ( 139 + did TEXT PRIMARY KEY, 140 + quota_limit INTEGER NOT NULL DEFAULT 10737418240, -- 10GB 141 + quota_used INTEGER NOT NULL DEFAULT 0, 142 + updated_at TIMESTAMP 143 + ); 144 + 145 + CREATE TABLE claimed_layers ( 146 + did TEXT NOT NULL, 147 + digest TEXT NOT NULL, 148 + size INTEGER NOT NULL, 149 + claimed_at TIMESTAMP, 150 + PRIMARY KEY(did, digest) 151 + ); 152 + ``` 153 + 154 + ### Recommendation 155 + 156 + - **BYOS (user-owned holds):** S3-based (keeps hold service ephemeral) 157 + - **Shared holds (multi-user):** SQLite (better performance and consistency) 158 + - **High-traffic production:** SQLite or PostgreSQL (Harbor uses this) 159 + 160 + ## Quota Data Model 161 + 162 + ### Quota File Format (S3-based) 163 + 164 + ```json 165 + { 166 + "did": "did:plc:alice123", 167 + "limit": 10737418240, 168 + "used": 5368709120, 169 + "claimed_layers": { 170 + "sha256:abc123...": 104857600, 171 + "sha256:def456...": 52428800, 172 + "sha256:789ghi...": 209715200 173 + }, 174 + "last_updated": "2025-10-09T12:34:56Z", 175 + "version": 1 176 + } 177 + ``` 178 + 179 + **Fields:** 180 + - `did`: User's ATProto DID 181 + - `limit`: Maximum storage in bytes (default: 10GB) 182 + - `used`: Current storage usage in bytes (sum of claimed_layers) 183 + - `claimed_layers`: Map of digest → size for all layers user has uploaded 184 + - `last_updated`: Timestamp of last quota update 185 + - `version`: Schema version for future migrations 186 + 187 + ### Why Track Individual Layers? 188 + 189 + **Q: Can't we just track a counter?** 190 + 191 + **A: We need layer tracking for:** 192 + 193 + 1. **Deduplication detection** 194 + - Check if user already claimed a layer → free upload 195 + - Example: Updating an image reuses most layers 196 + 197 + 2. **Accurate deletes** 198 + - When manifest deleted, only decrement unclaimed layers 199 + - User may have 5 images sharing layer A - deleting 1 image doesn't free layer A 200 + 201 + 3. **Quota reconciliation** 202 + - Verify quota matches reality by listing user's manifests 203 + - Recalculate from layers in manifests vs claimed_layers map 204 + 205 + 4. **Auditing** 206 + - "Show me what I'm storing" 207 + - Users can see which layers consume their quota 208 + 209 + ## Push Flow (Detailed) 210 + 211 + ### Step-by-Step: User Pushes Image 212 + 213 + ``` 214 + ┌──────────┐ ┌──────────┐ ┌──────────┐ 215 + │ Client │ │ Hold │ │ S3 │ 216 + │ (Docker) │ │ Service │ │ Bucket │ 217 + └──────────┘ └──────────┘ └──────────┘ 218 + │ │ │ 219 + │ 1. PUT /v2/.../blobs/ │ │ 220 + │ upload?digest=sha256:abc│ │ 221 + ├───────────────────────────>│ │ 222 + │ │ │ 223 + │ │ 2. Check if blob exists │ 224 + │ │ (Stat/HEAD request) │ 225 + │ ├───────────────────────────>│ 226 + │ │<───────────────────────────┤ 227 + │ │ 200 OK (exists) or │ 228 + │ │ 404 Not Found │ 229 + │ │ │ 230 + │ │ 3. Read user quota │ 231 + │ │ GET /atcr/quota/{did} │ 232 + │ ├───────────────────────────>│ 233 + │ │<───────────────────────────┤ 234 + │ │ quota.json │ 235 + │ │ │ 236 + │ │ 4. Calculate quota impact │ 237 + │ │ - If digest in │ 238 + │ │ claimed_layers: 0 │ 239 + │ │ - Else: size │ 240 + │ │ │ 241 + │ │ 5. Check quota limit │ 242 + │ │ used + impact <= limit? │ 243 + │ │ │ 244 + │ │ 6. Update quota │ 245 + │ │ PUT /atcr/quota/{did} │ 246 + │ ├───────────────────────────>│ 247 + │ │<───────────────────────────┤ 248 + │ │ 200 OK │ 249 + │ │ │ 250 + │ 7. Presigned URL │ │ 251 + │<───────────────────────────┤ │ 252 + │ {url: "https://s3..."} │ │ 253 + │ │ │ 254 + │ 8. Upload blob to S3 │ │ 255 + ├────────────────────────────┼───────────────────────────>│ 256 + │ │ │ 257 + │ 9. 200 OK │ │ 258 + │<───────────────────────────┼────────────────────────────┤ 259 + │ │ │ 260 + ``` 261 + 262 + ### Implementation (Pseudocode) 263 + 264 + ```go 265 + // cmd/hold/main.go - HandlePutPresignedURL 266 + 267 + func (s *HoldService) HandlePutPresignedURL(w http.ResponseWriter, r *http.Request) { 268 + var req PutPresignedURLRequest 269 + json.NewDecoder(r.Body).Decode(&req) 270 + 271 + // Step 1: Check if blob already exists in S3 272 + blobPath := fmt.Sprintf("/docker/registry/v2/blobs/%s/%s/%s/data", 273 + algorithm, digest[:2], digest) 274 + 275 + _, err := s.driver.Stat(ctx, blobPath) 276 + blobExists := (err == nil) 277 + 278 + // Step 2: Read quota from S3 (or SQLite) 279 + quota, err := s.quotaManager.GetQuota(req.DID) 280 + if err != nil { 281 + // First upload - create quota with defaults 282 + quota = &Quota{ 283 + DID: req.DID, 284 + Limit: s.config.QuotaDefaultLimit, 285 + Used: 0, 286 + ClaimedLayers: make(map[string]int64), 287 + } 288 + } 289 + 290 + // Step 3: Calculate quota impact 291 + quotaImpact := req.Size // Default: assume new layer 292 + 293 + if _, alreadyClaimed := quota.ClaimedLayers[req.Digest]; alreadyClaimed { 294 + // User already uploaded this layer before 295 + quotaImpact = 0 296 + log.Printf("Layer %s already claimed by %s, no quota impact", 297 + req.Digest, req.DID) 298 + } else if blobExists { 299 + // Blob exists in S3 (uploaded by another user) 300 + // But this user is claiming it for first time 301 + // Still counts against their quota 302 + log.Printf("Layer %s exists globally but new to %s, quota impact: %d", 303 + req.Digest, req.DID, quotaImpact) 304 + } else { 305 + // Brand new blob - will be uploaded to S3 306 + log.Printf("New layer %s for %s, quota impact: %d", 307 + req.Digest, req.DID, quotaImpact) 308 + } 309 + 310 + // Step 4: Check quota limit 311 + if quota.Used + quotaImpact > quota.Limit { 312 + http.Error(w, fmt.Sprintf( 313 + "quota exceeded: used=%d, impact=%d, limit=%d", 314 + quota.Used, quotaImpact, quota.Limit, 315 + ), http.StatusPaymentRequired) // 402 316 + return 317 + } 318 + 319 + // Step 5: Update quota (optimistic - before upload completes) 320 + quota.Used += quotaImpact 321 + if quotaImpact > 0 { 322 + quota.ClaimedLayers[req.Digest] = req.Size 323 + } 324 + quota.LastUpdated = time.Now() 325 + 326 + if err := s.quotaManager.SaveQuota(quota); err != nil { 327 + http.Error(w, "failed to update quota", http.StatusInternalServerError) 328 + return 329 + } 330 + 331 + // Step 6: Generate presigned URL 332 + presignedURL, err := s.getUploadURL(ctx, req.Digest, req.Size, req.DID) 333 + if err != nil { 334 + // Rollback quota update on error 335 + quota.Used -= quotaImpact 336 + delete(quota.ClaimedLayers, req.Digest) 337 + s.quotaManager.SaveQuota(quota) 338 + 339 + http.Error(w, "failed to generate presigned URL", http.StatusInternalServerError) 340 + return 341 + } 342 + 343 + // Step 7: Return presigned URL + quota info 344 + resp := PutPresignedURLResponse{ 345 + URL: presignedURL, 346 + ExpiresAt: time.Now().Add(15 * time.Minute), 347 + QuotaInfo: QuotaInfo{ 348 + Used: quota.Used, 349 + Limit: quota.Limit, 350 + Available: quota.Limit - quota.Used, 351 + Impact: quotaImpact, 352 + AlreadyClaimed: quotaImpact == 0, 353 + }, 354 + } 355 + 356 + w.Header().Set("Content-Type", "application/json") 357 + json.NewEncoder(w).Encode(resp) 358 + } 359 + ``` 360 + 361 + ### Race Condition Handling 362 + 363 + **Problem:** Two concurrent uploads of the same blob 364 + 365 + ``` 366 + Time User A User B 367 + 0ms Upload layer X (100MB) 368 + 10ms Upload layer X (100MB) 369 + 20ms Check exists: NO Check exists: NO 370 + 30ms Quota impact: 100MB Quota impact: 100MB 371 + 40ms Update quota A: +100MB Update quota B: +100MB 372 + 50ms Generate presigned URL Generate presigned URL 373 + 100ms Upload to S3 completes Upload to S3 (overwrites A's) 374 + ``` 375 + 376 + **Result:** Both users charged 100MB, but only 100MB stored in S3. 377 + 378 + **Mitigation strategies:** 379 + 380 + 1. **Accept eventual consistency** (recommended for S3-based) 381 + - Run periodic reconciliation to fix discrepancies 382 + - Small inconsistency window (minutes) is acceptable 383 + - Reconciliation uses PDS as source of truth 384 + 385 + 2. **Optimistic locking** (S3 ETags) 386 + ```go 387 + // Use S3 ETags for conditional writes 388 + oldETag := getQuotaFileETag(did) 389 + err := putQuotaFileWithCondition(quota, oldETag) 390 + if err == PreconditionFailed { 391 + // Retry with fresh read 392 + } 393 + ``` 394 + 395 + 3. **Database transactions** (SQLite-based) 396 + ```sql 397 + BEGIN TRANSACTION; 398 + SELECT * FROM user_quotas WHERE did = ? FOR UPDATE; 399 + UPDATE user_quotas SET used = used + ? WHERE did = ?; 400 + COMMIT; 401 + ``` 402 + 403 + ## Delete Flow 404 + 405 + ### Manifest Deletion via AppView UI 406 + 407 + When a user deletes a manifest through the AppView web interface: 408 + 409 + ``` 410 + ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ 411 + │ User │ │ AppView │ │ Hold │ │ PDS │ 412 + │ UI │ │ Database │ │ Service │ │ │ 413 + └──────────┘ └──────────┘ └──────────┘ └──────────┘ 414 + │ │ │ │ 415 + │ DELETE manifest │ │ │ 416 + ├─────────────────────>│ │ │ 417 + │ │ │ │ 418 + │ │ 1. Get manifest │ │ 419 + │ │ and layers │ │ 420 + │ │ │ │ 421 + │ │ 2. Check which │ │ 422 + │ │ layers still │ │ 423 + │ │ referenced by │ │ 424 + │ │ user's other │ │ 425 + │ │ manifests │ │ 426 + │ │ │ │ 427 + │ │ 3. DELETE manifest │ │ 428 + │ │ from PDS │ │ 429 + │ ├──────────────────────┼─────────────────────>│ 430 + │ │ │ │ 431 + │ │ 4. POST /quota/decrement │ 432 + │ ├─────────────────────>│ │ 433 + │ │ {layers: [...]} │ │ 434 + │ │ │ │ 435 + │ │ │ 5. Update quota │ 436 + │ │ │ Remove unclaimed │ 437 + │ │ │ layers │ 438 + │ │ │ │ 439 + │ │ 6. 200 OK │ │ 440 + │ │<─────────────────────┤ │ 441 + │ │ │ │ 442 + │ │ 7. Delete from DB │ │ 443 + │ │ │ │ 444 + │ 8. Success │ │ │ 445 + │<─────────────────────┤ │ │ 446 + │ │ │ │ 447 + ``` 448 + 449 + ### AppView Implementation 450 + 451 + ```go 452 + // pkg/appview/handlers/manifest.go 453 + 454 + func (h *ManifestHandler) DeleteManifest(w http.ResponseWriter, r *http.Request) { 455 + did := r.Context().Value("auth.did").(string) 456 + repository := chi.URLParam(r, "repository") 457 + digest := chi.URLParam(r, "digest") 458 + 459 + // Step 1: Get manifest and its layers from database 460 + manifest, err := db.GetManifest(h.db, digest) 461 + if err != nil { 462 + http.Error(w, "manifest not found", 404) 463 + return 464 + } 465 + 466 + layers, err := db.GetLayersForManifest(h.db, manifest.ID) 467 + if err != nil { 468 + http.Error(w, "failed to get layers", 500) 469 + return 470 + } 471 + 472 + // Step 2: For each layer, check if user still references it 473 + // in other manifests 474 + layersToDecrement := []LayerInfo{} 475 + 476 + for _, layer := range layers { 477 + // Query: does this user have other manifests using this layer? 478 + stillReferenced, err := db.CheckLayerReferencedByUser( 479 + h.db, did, repository, layer.Digest, manifest.ID, 480 + ) 481 + 482 + if err != nil { 483 + http.Error(w, "failed to check layer references", 500) 484 + return 485 + } 486 + 487 + if !stillReferenced { 488 + // This layer is no longer used by user 489 + layersToDecrement = append(layersToDecrement, LayerInfo{ 490 + Digest: layer.Digest, 491 + Size: layer.Size, 492 + }) 493 + } 494 + } 495 + 496 + // Step 3: Delete manifest from user's PDS 497 + atprotoClient := atproto.NewClient(manifest.PDSEndpoint, did, accessToken) 498 + err = atprotoClient.DeleteRecord(ctx, atproto.ManifestCollection, manifestRKey) 499 + if err != nil { 500 + http.Error(w, "failed to delete from PDS", 500) 501 + return 502 + } 503 + 504 + // Step 4: Notify hold service to decrement quota 505 + if len(layersToDecrement) > 0 { 506 + holdClient := &http.Client{} 507 + 508 + decrementReq := QuotaDecrementRequest{ 509 + DID: did, 510 + Layers: layersToDecrement, 511 + } 512 + 513 + body, _ := json.Marshal(decrementReq) 514 + resp, err := holdClient.Post( 515 + manifest.HoldEndpoint + "/quota/decrement", 516 + "application/json", 517 + bytes.NewReader(body), 518 + ) 519 + 520 + if err != nil || resp.StatusCode != 200 { 521 + log.Printf("Warning: failed to update quota on hold service: %v", err) 522 + // Continue anyway - GC reconciliation will fix it 523 + } 524 + } 525 + 526 + // Step 5: Delete from AppView database 527 + err = db.DeleteManifest(h.db, did, repository, digest) 528 + if err != nil { 529 + http.Error(w, "failed to delete from database", 500) 530 + return 531 + } 532 + 533 + w.WriteHeader(http.StatusNoContent) 534 + } 535 + ``` 536 + 537 + ### Hold Service Decrement Endpoint 538 + 539 + ```go 540 + // cmd/hold/main.go 541 + 542 + type QuotaDecrementRequest struct { 543 + DID string `json:"did"` 544 + Layers []LayerInfo `json:"layers"` 545 + } 546 + 547 + type LayerInfo struct { 548 + Digest string `json:"digest"` 549 + Size int64 `json:"size"` 550 + } 551 + 552 + func (s *HoldService) HandleQuotaDecrement(w http.ResponseWriter, r *http.Request) { 553 + var req QuotaDecrementRequest 554 + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { 555 + http.Error(w, "invalid request", 400) 556 + return 557 + } 558 + 559 + // Read current quota 560 + quota, err := s.quotaManager.GetQuota(req.DID) 561 + if err != nil { 562 + http.Error(w, "quota not found", 404) 563 + return 564 + } 565 + 566 + // Decrement quota for each layer 567 + for _, layer := range req.Layers { 568 + if size, claimed := quota.ClaimedLayers[layer.Digest]; claimed { 569 + // Remove from claimed layers 570 + delete(quota.ClaimedLayers, layer.Digest) 571 + quota.Used -= size 572 + 573 + log.Printf("Decremented quota for %s: layer %s (%d bytes)", 574 + req.DID, layer.Digest, size) 575 + } else { 576 + log.Printf("Warning: layer %s not in claimed_layers for %s", 577 + layer.Digest, req.DID) 578 + } 579 + } 580 + 581 + // Ensure quota.Used doesn't go negative (defensive) 582 + if quota.Used < 0 { 583 + log.Printf("Warning: quota.Used went negative for %s, resetting to 0", req.DID) 584 + quota.Used = 0 585 + } 586 + 587 + // Save updated quota 588 + quota.LastUpdated = time.Now() 589 + if err := s.quotaManager.SaveQuota(quota); err != nil { 590 + http.Error(w, "failed to save quota", 500) 591 + return 592 + } 593 + 594 + // Return updated quota info 595 + json.NewEncoder(w).Encode(map[string]any{ 596 + "used": quota.Used, 597 + "limit": quota.Limit, 598 + }) 599 + } 600 + ``` 601 + 602 + ### SQL Query: Check Layer References 603 + 604 + ```sql 605 + -- pkg/appview/db/queries.go 606 + 607 + -- Check if user still references this layer in other manifests 608 + SELECT COUNT(*) 609 + FROM layers l 610 + JOIN manifests m ON l.manifest_id = m.id 611 + WHERE m.did = ? -- User's DID 612 + AND l.digest = ? -- Layer digest 613 + AND m.id != ? -- Exclude the manifest being deleted 614 + ``` 615 + 616 + ## Garbage Collection 617 + 618 + ### Background: Orphaned Blobs 619 + 620 + Orphaned blobs accumulate when: 621 + 1. Manifest push fails after blobs uploaded (presigned URLs bypass hold) 622 + 2. Quota exceeded - manifest rejected, blobs already in S3 623 + 3. User deletes manifest - blobs no longer referenced 624 + 625 + **GC periodically cleans these up.** 626 + 627 + ### GC Cron Implementation 628 + 629 + Similar to AppView's backfill worker, the hold service can run periodic GC: 630 + 631 + ```go 632 + // cmd/hold/gc/gc.go 633 + 634 + type GarbageCollector struct { 635 + driver storagedriver.StorageDriver 636 + appviewURL string 637 + holdURL string 638 + quotaManager *quota.Manager 639 + } 640 + 641 + // Run garbage collection 642 + func (gc *GarbageCollector) Run(ctx context.Context) error { 643 + log.Println("Starting garbage collection...") 644 + 645 + // Step 1: Get list of referenced blobs from AppView 646 + referenced, err := gc.getReferencedBlobs() 647 + if err != nil { 648 + return fmt.Errorf("failed to get referenced blobs: %w", err) 649 + } 650 + 651 + referencedSet := make(map[string]bool) 652 + for _, digest := range referenced { 653 + referencedSet[digest] = true 654 + } 655 + 656 + log.Printf("AppView reports %d referenced blobs", len(referenced)) 657 + 658 + // Step 2: Walk S3 blobs 659 + deletedCount := 0 660 + reclaimedBytes := int64(0) 661 + 662 + err = gc.driver.Walk(ctx, "/docker/registry/v2/blobs", func(fileInfo storagedriver.FileInfo) error { 663 + if fileInfo.IsDir() { 664 + return nil // Skip directories 665 + } 666 + 667 + // Extract digest from path 668 + // Path: /docker/registry/v2/blobs/sha256/ab/abc123.../data 669 + digest := extractDigestFromPath(fileInfo.Path()) 670 + 671 + if !referencedSet[digest] { 672 + // Unreferenced blob - delete it 673 + size := fileInfo.Size() 674 + 675 + if err := gc.driver.Delete(ctx, fileInfo.Path()); err != nil { 676 + log.Printf("Failed to delete blob %s: %v", digest, err) 677 + return nil // Continue anyway 678 + } 679 + 680 + deletedCount++ 681 + reclaimedBytes += size 682 + 683 + log.Printf("GC: Deleted unreferenced blob %s (%d bytes)", digest, size) 684 + } 685 + 686 + return nil 687 + }) 688 + 689 + if err != nil { 690 + return fmt.Errorf("failed to walk blobs: %w", err) 691 + } 692 + 693 + log.Printf("GC complete: deleted %d blobs, reclaimed %d bytes", 694 + deletedCount, reclaimedBytes) 695 + 696 + return nil 697 + } 698 + 699 + // Get referenced blobs from AppView 700 + func (gc *GarbageCollector) getReferencedBlobs() ([]string, error) { 701 + // Query AppView for all blobs referenced by manifests 702 + // stored in THIS hold service 703 + url := fmt.Sprintf("%s/internal/blobs/referenced?hold=%s", 704 + gc.appviewURL, url.QueryEscape(gc.holdURL)) 705 + 706 + resp, err := http.Get(url) 707 + if err != nil { 708 + return nil, err 709 + } 710 + defer resp.Body.Close() 711 + 712 + var result struct { 713 + Blobs []string `json:"blobs"` 714 + } 715 + 716 + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { 717 + return nil, err 718 + } 719 + 720 + return result.Blobs, nil 721 + } 722 + ``` 723 + 724 + ### AppView Internal API 725 + 726 + ```go 727 + // pkg/appview/handlers/internal.go 728 + 729 + // Get all referenced blobs for a specific hold 730 + func (h *InternalHandler) GetReferencedBlobs(w http.ResponseWriter, r *http.Request) { 731 + holdEndpoint := r.URL.Query().Get("hold") 732 + if holdEndpoint == "" { 733 + http.Error(w, "missing hold parameter", 400) 734 + return 735 + } 736 + 737 + // Query database for all layers in manifests stored in this hold 738 + query := ` 739 + SELECT DISTINCT l.digest 740 + FROM layers l 741 + JOIN manifests m ON l.manifest_id = m.id 742 + WHERE m.hold_endpoint = ? 743 + ` 744 + 745 + rows, err := h.db.Query(query, holdEndpoint) 746 + if err != nil { 747 + http.Error(w, "database error", 500) 748 + return 749 + } 750 + defer rows.Close() 751 + 752 + blobs := []string{} 753 + for rows.Next() { 754 + var digest string 755 + if err := rows.Scan(&digest); err != nil { 756 + continue 757 + } 758 + blobs = append(blobs, digest) 759 + } 760 + 761 + json.NewEncoder(w).Encode(map[string]any{ 762 + "blobs": blobs, 763 + "count": len(blobs), 764 + "hold": holdEndpoint, 765 + }) 766 + } 767 + ``` 768 + 769 + ### GC Cron Schedule 770 + 771 + ```go 772 + // cmd/hold/main.go 773 + 774 + func main() { 775 + // ... service setup ... 776 + 777 + // Start GC cron if enabled 778 + if os.Getenv("GC_ENABLED") == "true" { 779 + gcInterval := 24 * time.Hour // Daily by default 780 + 781 + go func() { 782 + ticker := time.NewTicker(gcInterval) 783 + defer ticker.Stop() 784 + 785 + for range ticker.C { 786 + if err := garbageCollector.Run(context.Background()); err != nil { 787 + log.Printf("GC error: %v", err) 788 + } 789 + } 790 + }() 791 + 792 + log.Printf("GC cron started: runs every %v", gcInterval) 793 + } 794 + 795 + // Start server... 796 + } 797 + ``` 798 + 799 + ## Quota Reconciliation 800 + 801 + ### PDS as Source of Truth 802 + 803 + **Key insight:** Manifest records in PDS are publicly readable (no OAuth needed for reads). 804 + 805 + Each manifest contains: 806 + - Repository name 807 + - Digest 808 + - Layers array with digest + size 809 + - Hold endpoint 810 + 811 + The hold service can query the PDS to calculate the user's true quota: 812 + 813 + ``` 814 + 1. List all io.atcr.manifest records for user 815 + 2. Filter manifests where holdEndpoint == this hold service 816 + 3. Extract unique layers (deduplicate by digest) 817 + 4. Sum layer sizes = true quota usage 818 + 5. Compare to quota file 819 + 6. Fix discrepancies 820 + ``` 821 + 822 + ### Implementation 823 + 824 + ```go 825 + // cmd/hold/quota/reconcile.go 826 + 827 + type Reconciler struct { 828 + quotaManager *Manager 829 + atprotoResolver *atproto.Resolver 830 + holdURL string 831 + } 832 + 833 + // ReconcileUser recalculates quota from PDS manifests 834 + func (r *Reconciler) ReconcileUser(ctx context.Context, did string) error { 835 + log.Printf("Reconciling quota for %s", did) 836 + 837 + // Step 1: Resolve user's PDS endpoint 838 + identity, err := r.atprotoResolver.ResolveIdentity(ctx, did) 839 + if err != nil { 840 + return fmt.Errorf("failed to resolve DID: %w", err) 841 + } 842 + 843 + // Step 2: Create unauthenticated ATProto client 844 + // (manifest records are public - no OAuth needed) 845 + client := atproto.NewClient(identity.PDSEndpoint, did, "") 846 + 847 + // Step 3: List all manifest records for this user 848 + manifests, err := client.ListRecords(ctx, atproto.ManifestCollection, 1000) 849 + if err != nil { 850 + return fmt.Errorf("failed to list manifests: %w", err) 851 + } 852 + 853 + // Step 4: Filter manifests stored in THIS hold service 854 + // and extract unique layers 855 + uniqueLayers := make(map[string]int64) // digest -> size 856 + 857 + for _, record := range manifests { 858 + var manifest atproto.ManifestRecord 859 + if err := json.Unmarshal(record.Value, &manifest); err != nil { 860 + log.Printf("Warning: failed to parse manifest: %v", err) 861 + continue 862 + } 863 + 864 + // Only count manifests stored in this hold 865 + if manifest.HoldEndpoint != r.holdURL { 866 + continue 867 + } 868 + 869 + // Add config blob 870 + if manifest.Config.Digest != "" { 871 + uniqueLayers[manifest.Config.Digest] = manifest.Config.Size 872 + } 873 + 874 + // Add layer blobs 875 + for _, layer := range manifest.Layers { 876 + uniqueLayers[layer.Digest] = layer.Size 877 + } 878 + } 879 + 880 + // Step 5: Calculate true quota usage 881 + trueUsage := int64(0) 882 + for _, size := range uniqueLayers { 883 + trueUsage += size 884 + } 885 + 886 + log.Printf("User %s true usage from PDS: %d bytes (%d unique layers)", 887 + did, trueUsage, len(uniqueLayers)) 888 + 889 + // Step 6: Compare with current quota file 890 + quota, err := r.quotaManager.GetQuota(did) 891 + if err != nil { 892 + log.Printf("No existing quota for %s, creating new", did) 893 + quota = &Quota{ 894 + DID: did, 895 + Limit: r.quotaManager.DefaultLimit, 896 + ClaimedLayers: make(map[string]int64), 897 + } 898 + } 899 + 900 + // Step 7: Fix discrepancies 901 + if quota.Used != trueUsage || len(quota.ClaimedLayers) != len(uniqueLayers) { 902 + log.Printf("Quota mismatch for %s: recorded=%d, actual=%d (diff=%d)", 903 + did, quota.Used, trueUsage, trueUsage - quota.Used) 904 + 905 + // Update quota to match PDS truth 906 + quota.Used = trueUsage 907 + quota.ClaimedLayers = uniqueLayers 908 + quota.LastUpdated = time.Now() 909 + 910 + if err := r.quotaManager.SaveQuota(quota); err != nil { 911 + return fmt.Errorf("failed to save reconciled quota: %w", err) 912 + } 913 + 914 + log.Printf("Reconciled quota for %s: %d bytes", did, trueUsage) 915 + } else { 916 + log.Printf("Quota for %s is accurate", did) 917 + } 918 + 919 + return nil 920 + } 921 + 922 + // ReconcileAll reconciles all users (run periodically) 923 + func (r *Reconciler) ReconcileAll(ctx context.Context) error { 924 + // Get list of all users with quota files 925 + users, err := r.quotaManager.ListUsers() 926 + if err != nil { 927 + return err 928 + } 929 + 930 + log.Printf("Starting reconciliation for %d users", len(users)) 931 + 932 + for _, did := range users { 933 + if err := r.ReconcileUser(ctx, did); err != nil { 934 + log.Printf("Failed to reconcile %s: %v", did, err) 935 + // Continue with other users 936 + } 937 + } 938 + 939 + log.Println("Reconciliation complete") 940 + return nil 941 + } 942 + ``` 943 + 944 + ### Reconciliation Cron 945 + 946 + ```go 947 + // cmd/hold/main.go 948 + 949 + func main() { 950 + // ... setup ... 951 + 952 + // Start reconciliation cron 953 + if os.Getenv("QUOTA_RECONCILE_ENABLED") == "true" { 954 + reconcileInterval := 24 * time.Hour // Daily 955 + 956 + go func() { 957 + ticker := time.NewTicker(reconcileInterval) 958 + defer ticker.Stop() 959 + 960 + for range ticker.C { 961 + if err := reconciler.ReconcileAll(context.Background()); err != nil { 962 + log.Printf("Reconciliation error: %v", err) 963 + } 964 + } 965 + }() 966 + 967 + log.Printf("Quota reconciliation cron started: runs every %v", reconcileInterval) 968 + } 969 + 970 + // ... start server ... 971 + } 972 + ``` 973 + 974 + ### Why PDS as Source of Truth Works 975 + 976 + 1. **Manifests are canonical** - If manifest exists in PDS, user owns those layers 977 + 2. **Public reads** - No OAuth needed, just resolve DID → PDS endpoint 978 + 3. **ATProto durability** - PDS is user's authoritative data store 979 + 4. **AppView is cache** - AppView database might lag or have inconsistencies 980 + 5. **Reconciliation fixes drift** - Periodic sync from PDS ensures accuracy 981 + 982 + **Example reconciliation scenarios:** 983 + 984 + - **Orphaned quota entries:** User deleted manifest from PDS, but hold quota still has it 985 + → Reconciliation removes from claimed_layers 986 + 987 + - **Missing quota entries:** User pushed manifest, but quota update failed 988 + → Reconciliation adds to claimed_layers 989 + 990 + - **Race condition duplicates:** Two concurrent pushes double-counted a layer 991 + → Reconciliation fixes to actual usage 992 + 993 + ## Configuration 994 + 995 + ### Hold Service Environment Variables 996 + 997 + ```bash 998 + # .env.hold 999 + 1000 + # ============================================================================ 1001 + # Quota Configuration 1002 + # ============================================================================ 1003 + 1004 + # Enable quota enforcement 1005 + QUOTA_ENABLED=true 1006 + 1007 + # Default quota limit per user (bytes) 1008 + # 10GB = 10737418240 1009 + # 50GB = 53687091200 1010 + # 100GB = 107374182400 1011 + QUOTA_DEFAULT_LIMIT=10737418240 1012 + 1013 + # Storage backend for quota data 1014 + # Options: s3, sqlite 1015 + QUOTA_STORAGE_BACKEND=s3 1016 + 1017 + # For S3-based storage: 1018 + # Quota files stored in same bucket as blobs 1019 + QUOTA_STORAGE_PREFIX=/atcr/quota/ 1020 + 1021 + # For SQLite-based storage: 1022 + QUOTA_DB_PATH=/var/lib/atcr/hold-quota.db 1023 + 1024 + # ============================================================================ 1025 + # Garbage Collection 1026 + # ============================================================================ 1027 + 1028 + # Enable periodic garbage collection 1029 + GC_ENABLED=true 1030 + 1031 + # GC interval (default: 24h) 1032 + GC_INTERVAL=24h 1033 + 1034 + # AppView URL for GC reference checking 1035 + APPVIEW_URL=https://atcr.io 1036 + 1037 + # ============================================================================ 1038 + # Quota Reconciliation 1039 + # ============================================================================ 1040 + 1041 + # Enable quota reconciliation from PDS 1042 + QUOTA_RECONCILE_ENABLED=true 1043 + 1044 + # Reconciliation interval (default: 24h) 1045 + QUOTA_RECONCILE_INTERVAL=24h 1046 + 1047 + # ============================================================================ 1048 + # Hold Service Identity (Required) 1049 + # ============================================================================ 1050 + 1051 + # Public URL of this hold service 1052 + HOLD_PUBLIC_URL=https://hold1.example.com 1053 + 1054 + # Owner DID (for auto-registration) 1055 + HOLD_OWNER=did:plc:xyz123 1056 + ``` 1057 + 1058 + ### AppView Configuration 1059 + 1060 + ```bash 1061 + # .env.appview 1062 + 1063 + # Internal API endpoint for hold services 1064 + # Used for GC reference checking 1065 + ATCR_INTERNAL_API_ENABLED=true 1066 + 1067 + # Optional: authentication token for internal APIs 1068 + ATCR_INTERNAL_API_TOKEN=secret123 1069 + ``` 1070 + 1071 + ## Trade-offs & Design Decisions 1072 + 1073 + ### 1. Claimed Storage vs Physical Storage 1074 + 1075 + **Decision:** Track claimed storage (logical accounting) 1076 + 1077 + **Why:** 1078 + - Predictable for users: "you pay for what you upload" 1079 + - No complex cross-user dependencies 1080 + - Delete always gives you quota back 1081 + - Matches Harbor's proven model 1082 + 1083 + **Trade-off:** 1084 + - Total claimed can exceed physical storage 1085 + - Users might complain "I uploaded 10GB but S3 only has 6GB" 1086 + 1087 + **Mitigation:** 1088 + - Show deduplication savings metric 1089 + - Educate users: "You claimed 10GB, but deduplication saved 4GB" 1090 + 1091 + ### 2. S3 vs SQLite for Quota Storage 1092 + 1093 + **Decision:** Support both, recommend based on use case 1094 + 1095 + **S3 Pros:** 1096 + - No database to manage 1097 + - Quota data lives with blobs 1098 + - Better for ephemeral BYOS 1099 + 1100 + **SQLite Pros:** 1101 + - Faster (no network) 1102 + - ACID transactions (no race conditions) 1103 + - Better for high-traffic shared holds 1104 + 1105 + **Trade-off:** 1106 + - S3: eventual consistency, race conditions 1107 + - SQLite: stateful service, scaling challenges 1108 + 1109 + **Mitigation:** 1110 + - Reconciliation fixes S3 inconsistencies 1111 + - SQLite can use shared DB for multi-instance 1112 + 1113 + ### 3. Optimistic Quota Update 1114 + 1115 + **Decision:** Update quota BEFORE upload completes 1116 + 1117 + **Why:** 1118 + - Prevent race conditions (two users uploading simultaneously) 1119 + - Can reject before presigned URL generated 1120 + - Simpler flow 1121 + 1122 + **Trade-off:** 1123 + - If upload fails, quota already incremented (user "paid" for nothing) 1124 + 1125 + **Mitigation:** 1126 + - Reconciliation from PDS fixes orphaned quota entries 1127 + - Acceptable for MVP (upload failures are rare) 1128 + 1129 + ### 4. AppView as Intermediary 1130 + 1131 + **Decision:** AppView notifies hold service on deletes 1132 + 1133 + **Why:** 1134 + - AppView already has manifest/layer database 1135 + - Can efficiently check if layer still referenced 1136 + - Hold service doesn't need to query PDS on every delete 1137 + 1138 + **Trade-off:** 1139 + - AppView → Hold dependency 1140 + - Network hop on delete 1141 + 1142 + **Mitigation:** 1143 + - If notification fails, reconciliation fixes quota 1144 + - Eventually consistent is acceptable 1145 + 1146 + ### 5. PDS as Source of Truth 1147 + 1148 + **Decision:** Use PDS manifests for reconciliation 1149 + 1150 + **Why:** 1151 + - Manifests in PDS are canonical user data 1152 + - Public reads (no OAuth for reconciliation) 1153 + - AppView database might lag or be inconsistent 1154 + 1155 + **Trade-off:** 1156 + - Reconciliation requires PDS queries (slower) 1157 + - Limited to 1000 manifests per query 1158 + 1159 + **Mitigation:** 1160 + - Run reconciliation daily (not real-time) 1161 + - Paginate if user has >1000 manifests 1162 + 1163 + ## Future Enhancements 1164 + 1165 + ### 1. Quota API Endpoints 1166 + 1167 + ``` 1168 + GET /quota/usage - Get current user's quota 1169 + GET /quota/breakdown - Get storage by repository 1170 + POST /quota/limit - Update user's quota limit (admin) 1171 + GET /quota/stats - Get hold-wide statistics 1172 + ``` 1173 + 1174 + ### 2. Quota Alerts 1175 + 1176 + Notify users when approaching limit: 1177 + - Email/webhook at 80%, 90%, 95% 1178 + - Reject uploads at 100% (currently implemented) 1179 + - Grace period: allow 105% temporarily 1180 + 1181 + ### 3. Tiered Quotas 1182 + 1183 + Different limits based on user tier: 1184 + - Free: 10GB 1185 + - Pro: 100GB 1186 + - Enterprise: unlimited 1187 + 1188 + ### 4. Quota Purchasing 1189 + 1190 + Allow users to buy additional storage: 1191 + - Stripe integration 1192 + - $0.10/GB/month pricing 1193 + - Dynamic limit updates 1194 + 1195 + ### 5. Cross-Hold Deduplication 1196 + 1197 + If multiple holds share same S3 bucket: 1198 + - Track blob ownership globally 1199 + - Split costs proportionally 1200 + - More complex, but maximizes deduplication 1201 + 1202 + ### 6. Manifest-Based Quota (Alternative Model) 1203 + 1204 + Instead of tracking layers, track manifests: 1205 + - Simpler: just count manifest sizes 1206 + - No deduplication benefits for users 1207 + - Might be acceptable for some use cases 1208 + 1209 + ### 7. Redis-Based Quota (High Performance) 1210 + 1211 + For high-traffic registries: 1212 + - Use Redis instead of S3/SQLite 1213 + - Sub-millisecond quota checks 1214 + - Harbor-proven approach 1215 + 1216 + ### 8. Quota Visualizations 1217 + 1218 + Web UI showing: 1219 + - Storage usage over time 1220 + - Top consumers by repository 1221 + - Deduplication savings graph 1222 + - Layer size distribution 1223 + 1224 + ## Appendix: SQL Queries 1225 + 1226 + ### Check if User Still References Layer 1227 + 1228 + ```sql 1229 + -- After deleting manifest, check if user has other manifests using this layer 1230 + SELECT COUNT(*) 1231 + FROM layers l 1232 + JOIN manifests m ON l.manifest_id = m.id 1233 + WHERE m.did = ? -- User's DID 1234 + AND l.digest = ? -- Layer digest to check 1235 + AND m.id != ? -- Exclude the manifest being deleted 1236 + ``` 1237 + 1238 + ### Get All Unique Layers for User 1239 + 1240 + ```sql 1241 + -- Calculate true quota usage for a user 1242 + SELECT DISTINCT l.digest, l.size 1243 + FROM layers l 1244 + JOIN manifests m ON l.manifest_id = m.id 1245 + WHERE m.did = ? 1246 + AND m.hold_endpoint = ? 1247 + ``` 1248 + 1249 + ### Get Referenced Blobs for Hold 1250 + 1251 + ```sql 1252 + -- For GC: get all blobs still referenced by any user of this hold 1253 + SELECT DISTINCT l.digest 1254 + FROM layers l 1255 + JOIN manifests m ON l.manifest_id = m.id 1256 + WHERE m.hold_endpoint = ? 1257 + ``` 1258 + 1259 + ### Get Storage Stats by Repository 1260 + 1261 + ```sql 1262 + -- User's storage broken down by repository 1263 + SELECT 1264 + m.repository, 1265 + COUNT(DISTINCT m.id) as manifest_count, 1266 + COUNT(DISTINCT l.digest) as unique_layers, 1267 + SUM(l.size) as total_size 1268 + FROM manifests m 1269 + JOIN layers l ON l.manifest_id = m.id 1270 + WHERE m.did = ? 1271 + AND m.hold_endpoint = ? 1272 + GROUP BY m.repository 1273 + ORDER BY total_size DESC 1274 + ``` 1275 + 1276 + ## References 1277 + 1278 + - **Harbor Quotas:** https://goharbor.io/docs/1.10/administration/configure-project-quotas/ 1279 + - **Harbor Source:** https://github.com/goharbor/harbor 1280 + - **ATProto Spec:** https://atproto.com/specs/record 1281 + - **OCI Distribution Spec:** https://github.com/opencontainers/distribution-spec 1282 + - **S3 API Reference:** https://docs.aws.amazon.com/AmazonS3/latest/API/ 1283 + - **Distribution GC:** https://github.com/distribution/distribution/blob/main/registry/storage/garbagecollect.go 1284 + 1285 + --- 1286 + 1287 + **Document Version:** 1.0 1288 + **Last Updated:** 2025-10-09 1289 + **Author:** Generated from implementation research and Harbor analysis
-460
docs/SPEC.md
··· 1 - ATProto Container Registry (atcr.io) Implementation Plan 2 - 3 - Project Structure 4 - 5 - /home/data/atcr.io/ 6 - ├── cmd/ 7 - │ └── registry/ 8 - │ └── main.go # Entrypoint that imports distribution 9 - ├── pkg/ 10 - │ ├── atproto/ 11 - │ │ ├── client.go # ATProto client wrapper (using indigo) 12 - │ │ ├── manifest_store.go # Implements distribution.ManifestService 13 - │ │ ├── resolver.go # DID/handle resolution (alice → did:plc:...) 14 - │ │ └── lexicon.go # ATProto record schemas for manifests 15 - │ ├── storage/ 16 - │ │ ├── s3_blob_store.go # Wraps distribution's S3 driver for blobs 17 - │ │ └── routing_repository.go # Routes manifests→ATProto, blobs→S3 18 - │ ├── middleware/ 19 - │ │ ├── repository.go # Repository middleware registration 20 - │ │ └── registry.go # Registry middleware for name resolution 21 - │ └── server/ 22 - │ └── handler.go # HTTP wrapper for custom name resolution 23 - ├── config/ 24 - │ └── config.yml # Registry configuration 25 - ├── go.mod 26 - ├── go.sum 27 - ├── Dockerfile 28 - ├── README.md 29 - └── CLAUDE.md # Updated with architecture docs 30 - 31 - 32 - Implementation Steps 33 - 34 - Phase 1: Project Setup 35 - 36 - 1. Initialize Go module with github.com/distribution/distribution/v3 and github.com/bluesky-social/indigo 37 - 2. Create basic project structure 38 - 3. Set up cmd/appview/main.go that imports distribution and registers middleware 39 - 40 - Phase 2: Core ATProto Integration 41 - 42 - 4. Implement DID/handle resolver (pkg/atproto/resolver.go) 43 - - Resolve handles to DIDs (alice.bsky.social → did:plc:xyz) 44 - - Discover PDS endpoints from DID documents 45 - 5. Create ATProto client wrapper (pkg/atproto/client.go) 46 - - Wrap indigo SDK for manifest storage 47 - - Handle authentication with PDS 48 - 6. Design ATProto lexicon for manifest records (pkg/atproto/lexicon.go) 49 - - Define schema for storing OCI manifests as ATProto records 50 - 51 - Phase 3: Storage Layer 52 - 53 - 7. Implement ATProto manifest store (pkg/atproto/manifest_store.go) 54 - - Implements distribution.ManifestService 55 - - Stores/retrieves manifests from PDS 56 - 8. Implement S3 blob store wrapper (pkg/storage/s3_blob_store.go) 57 - - Wraps distribution's built-in S3 driver 58 - 9. Create routing repository (pkg/storage/routing_repository.go) 59 - - Returns ATProto store for Manifests() 60 - - Returns S3 store for Blobs() 61 - 62 - Phase 4: Middleware Layer 63 - 64 - 10. Implement repository middleware (pkg/middleware/repository.go) 65 - - Registers routing repository 66 - - Configurable via YAML 67 - 11. Implement registry/namespace middleware (pkg/middleware/registry.go) 68 - - Intercepts Repository(name) calls 69 - - Performs name resolution before repository creation 70 - 71 - Phase 5: HTTP Layer (if needed) 72 - 73 - 12. Create custom HTTP handler (pkg/server/handler.go) 74 - - Wraps distribution's HTTP handlers 75 - - Performs early name resolution: atcr.io/alice/myimage → resolve alice 76 - - Delegates to distribution handlers 77 - 78 - Phase 6: Configuration & Deployment 79 - 80 - 13. Create registry configuration (config/config.yml) 81 - 14. Create Dockerfile for building atcr-appview binary 82 - 16. Write README.md with usage instructions 83 - 84 - Phase 7: Documentation 85 - 86 - 17. Update CLAUDE.md with: 87 - - Architecture overview (ATProto for manifests, S3 for blobs) 88 - - Build/run/test commands 89 - - How name resolution works 90 - - Middleware registration flow 91 - - Key design decisions 92 - 93 - Key Technical Decisions 94 - 95 - ATProto Storage Design: 96 - - Manifests stored as ATProto records in user's PDS 97 - - Each image manifest is a record in a io.atcr.manifest collection 98 - - Record key = image digest (sha256:...) 99 - 100 - Name Resolution: 101 - - atcr.io/alice/myimage → resolve alice to DID → discover PDS 102 - - Support both handles (alice.bsky.social) and DIDs (did:plc:xyz) 103 - 104 - Blob Storage: 105 - - All layers/blobs in S3 (content-addressable by digest) 106 - - Manifests reference S3-stored blobs by digest 107 - - S3 provides cheap, durable blob storage 108 - 109 - Middleware Strategy: 110 - - Repository middleware for storage routing 111 - - Registry middleware (or HTTP wrapper) for name resolution 112 - - No fork of distribution core needed 113 - 114 - Perfect. To match Docker Hub/ghcr.io/gcr.io, here's what we need: 115 - 116 - ● Implementation Plan (Drop-in replacement for Docker Hub/ghcr.io) 117 - 118 - Flow 1: Standard Token Auth (Like Docker Hub) - PRIMARY 119 - 120 - # User experience 121 - docker login atcr.io -u alice.bsky.social -p <atproto-app-password> 122 - docker push atcr.io/alice/myapp:latest 123 - 124 - # Behind the scenes 125 - 1. docker login stores credentials locally 126 - 2. docker push → Registry returns 401 with WWW-Authenticate: Bearer realm="https://atcr.io/auth/token"... 127 - 3. Docker auto-calls /auth/token with Basic auth (alice.bsky.social:app-password) 128 - 4. Auth service validates against ATProto createSession 129 - 5. Returns JWT token with scope for alice/myapp 130 - 6. Docker uses JWT for manifest/blob uploads 131 - 7. Registry validates JWT signature and scope 132 - 133 - Components: 134 - - /auth/token endpoint (standalone service or embedded) 135 - - ATProto session validator (username/password → validate via PDS) 136 - - JWT issuer/signer 137 - - JWT validator middleware for registry 138 - 139 - Flow 2: Credential Helper (Like gcr.io) - ADVANCED 140 - 141 - # User experience 142 - docker-credential-atcr configure 143 - # Opens browser for ATProto OAuth 144 - docker push atcr.io/alice/myapp:latest 145 - # No manual login needed 146 - 147 - # Behind the scenes 148 - 1. Helper does OAuth flow → gets ATProto access token 149 - 2. Caches token securely 150 - 3. When Docker needs credentials, calls helper via stdin/stdout 151 - 4. Helper exchanges ATProto token for registry JWT at /auth/exchange 152 - 5. Returns JWT to Docker 153 - 6. Docker uses JWT for requests 154 - 155 - Components: 156 - - cmd/credential-helper/main.go - Standalone binary 157 - - ATProto OAuth client 158 - - Token exchange endpoint (/auth/exchange) 159 - - Secure token cache 160 - 161 - Architecture: 162 - 163 - pkg/auth/ 164 - ├── token/ 165 - │ ├── service.go # HTTP handler for /auth/token 166 - │ ├── claims.go # JWT claims structure 167 - │ ├── issuer.go # Signs JWTs 168 - │ └── validator.go # Validates JWTs (middleware for registry) 169 - ├── atproto/ 170 - │ ├── session.go # Validates username/password via ATProto 171 - │ └── oauth.go # OAuth flow implementation 172 - ├── exchange/ 173 - │ └── handler.go # /auth/exchange endpoint (OAuth → JWT) 174 - └── scope.go # Parses/validates Docker scopes 175 - 176 - cmd/ 177 - ├── registry/main.go # Registry server (existing) 178 - ├── auth/main.go # Standalone auth service (optional) 179 - └── credential-helper/ 180 - └── main.go # docker-credential-atcr binary 181 - 182 - Config: 183 - 184 - auth: 185 - token: 186 - realm: https://atcr.io/auth/token # Where Docker gets tokens 187 - service: atcr.io 188 - issuer: atcr.io 189 - rootcertbundle: /etc/atcr/token-signing.crt 190 - privatekey: /etc/atcr/token-signing.pem 191 - expiration: 300 192 - 193 - atproto: 194 - # Used by auth service to validate credentials 195 - pds_endpoint: https://bsky.social 196 - client_id: atcr-appview 197 - oauth_redirect: http://localhost:8888/callback 198 - 199 - ATProto OAuth Implementation Plan 200 - 201 - Architecture 202 - 203 - Dependencies: 204 - - authelia.com/client/oauth2 - OAuth + PAR support 205 - - github.com/AxisCommunications/go-dpop - DPoP proof generation (handles JWK automatically) 206 - - github.com/golang-jwt/jwt/v5 - JWT library (transitive via go-dpop) 207 - - Our existing pkg/atproto/resolver.go - ATProto identity resolution 208 - 209 - Implementation Components 210 - 211 - 1. OAuth Client (pkg/auth/oauth/client.go) - ~100 lines 212 - 213 - type Client struct { 214 - config *oauth2.Config 215 - dpopKey *ecdsa.PrivateKey 216 - resolver *atproto.Resolver 217 - clientID string // URL to our metadata document 218 - redirectURI string 219 - dpopNonce string // Server-provided nonce 220 - } 221 - 222 - func NewClient(clientID, redirectURI string) (*Client, error) 223 - func (c *Client) AuthorizeURL(handle string, scopes []string) (string, error) 224 - func (c *Client) Exchange(code string) (*Token, error) 225 - func (c *Client) addDPoPHeader(req *http.Request, method, url string) error 226 - 227 - Flow: 228 - 1. Generate ECDSA P-256 key for DPoP 229 - 2. Discover authorization server from handle/DID 230 - 3. Use authelia's PushedAuth() for PAR with DPoP header 231 - 4. Exchange code for token with DPoP proof 232 - 233 - 2. Authorization Server Discovery (pkg/auth/oauth/discovery.go) - ~30 lines 234 - 235 - type AuthServerMetadata struct { 236 - Issuer string `json:"issuer"` 237 - AuthorizationEndpoint string `json:"authorization_endpoint"` 238 - TokenEndpoint string `json:"token_endpoint"` 239 - PushedAuthorizationRequestEndpoint string `json:"pushed_authorization_request_endpoint"` 240 - DPoPSigningAlgValuesSupported []string `json:"dpop_signing_alg_values_supported"` 241 - } 242 - 243 - func DiscoverAuthServer(pdsEndpoint string) (*AuthServerMetadata, error) 244 - 245 - Implementation: 246 - - GET {pds}/.well-known/oauth-authorization-server 247 - - Parse JSON metadata 248 - - Validate required endpoints exist 249 - 250 - 3. Client Metadata Server (pkg/auth/oauth/metadata.go) - ~40 lines 251 - 252 - type ClientMetadata struct { 253 - ClientID string `json:"client_id"` 254 - RedirectURIs []string `json:"redirect_uris"` 255 - GrantTypes []string `json:"grant_types"` 256 - ResponseTypes []string `json:"response_types"` 257 - Scope string `json:"scope"` 258 - DPoPBoundAccessTokens bool `json:"dpop_bound_access_tokens"` 259 - } 260 - 261 - func ServeMetadata(clientID string, redirectURIs []string) http.Handler 262 - 263 - Serves: https://atcr.io/oauth/client-metadata.json 264 - 265 - 4. Token Storage (pkg/auth/oauth/storage.go) - ~50 lines 266 - 267 - type TokenStore struct { 268 - AccessToken string 269 - RefreshToken string 270 - DPoPKey *ecdsa.PrivateKey // Persist for refresh 271 - ExpiresAt time.Time 272 - } 273 - 274 - func (s *TokenStore) Save(path string) error 275 - func LoadTokenStore(path string) (*TokenStore, error) 276 - 277 - Storage location: ~/.atcr/oauth-tokens.json 278 - 279 - 5. Credential Helper (cmd/credential-helper/main.go) - ~80 lines 280 - 281 - // Docker credential helper protocol 282 - // Reads JSON from stdin, writes to stdout 283 - 284 - func main() { 285 - if len(os.Args) < 2 { 286 - os.Exit(1) 287 - } 288 - 289 - switch os.Args[1] { 290 - case "get": 291 - handleGet() // Return credentials for registry 292 - case "store": 293 - handleStore() // Store credentials 294 - case "erase": 295 - handleErase() // Remove credentials 296 - } 297 - } 298 - 299 - func handleGet() { 300 - var request struct { 301 - ServerURL string `json:"ServerURL"` 302 - } 303 - json.NewDecoder(os.Stdin).Decode(&request) 304 - 305 - // Load token from storage 306 - // Exchange for registry JWT if needed 307 - // Output: {"Username": "oauth2", "Secret": "<jwt>"} 308 - } 309 - 310 - 6. OAuth Flow (cmd/credential-helper/oauth.go) - ~60 lines 311 - 312 - func RunOAuthFlow(handle string) (*TokenStore, error) { 313 - // 1. Start local HTTP server on :8888 314 - // 2. Open browser to authorization URL 315 - // 3. Wait for callback with code 316 - // 4. Exchange code for token 317 - // 5. Save token store 318 - // 6. Return token 319 - } 320 - 321 - func startCallbackServer() (chan string, *http.Server) 322 - 323 - Complete Flow Example 324 - 325 - User runs: 326 - docker-credential-atcr configure 327 - 328 - What happens: 329 - 330 - 1. Generate DPoP key (client.go) 331 - dpopKey, _ := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) 332 - 333 - 2. Resolve handle → DID → PDS (using our resolver) 334 - did, pds, _ := resolver.ResolveIdentity(ctx, "alice.bsky.social") 335 - 336 - 3. Discover auth server (discovery.go) 337 - metadata, _ := DiscoverAuthServer(pds) 338 - // Returns: PAR endpoint, token endpoint, etc. 339 - 340 - 4. Create PAR request with DPoP (client.go + go-dpop) 341 - // Generate DPoP proof for PAR endpoint 342 - claims := &dpop.ProofTokenClaims{ 343 - Method: dpop.POST, 344 - URL: metadata.PushedAuthorizationRequestEndpoint, 345 - RegisteredClaims: &jwt.RegisteredClaims{ 346 - IssuedAt: jwt.NewNumericDate(time.Now()), 347 - }, 348 - } 349 - dpopProof, _ := dpop.Create(jwt.SigningMethodES256, claims, dpopKey) 350 - 351 - // Use authelia for PAR 352 - config := &oauth2.Config{ 353 - ClientID: "https://atcr.io/oauth/client-metadata.json", 354 - Endpoint: oauth2.Endpoint{ 355 - AuthURL: metadata.AuthorizationEndpoint, 356 - TokenURL: metadata.TokenEndpoint, 357 - }, 358 - } 359 - 360 - // Create custom HTTP client that adds DPoP header 361 - client := &http.Client{ 362 - Transport: &dpopTransport{ 363 - base: http.DefaultTransport, 364 - dpopKey: dpopKey, 365 - }, 366 - } 367 - ctx := context.WithValue(context.Background(), oauth2.HTTPClient, client) 368 - 369 - // PAR request (authelia handles this) 370 - authURL, parResp, _ := config.PushedAuth(ctx, state, 371 - oauth2.SetAuthURLParam("code_challenge", pkceChallenge), 372 - oauth2.SetAuthURLParam("code_challenge_method", "S256"), 373 - ) 374 - 375 - 5. Open browser, get code (oauth.go) 376 - exec.Command("open", authURL).Run() 377 - // User authorizes 378 - // Callback: http://localhost:8888?code=xyz&state=abc 379 - 380 - 6. Exchange code for token with DPoP (client.go + go-dpop) 381 - // Generate DPoP proof for token endpoint 382 - claims := &dpop.ProofTokenClaims{ 383 - Method: dpop.POST, 384 - URL: metadata.TokenEndpoint, 385 - RegisteredClaims: &jwt.RegisteredClaims{ 386 - IssuedAt: jwt.NewNumericDate(time.Now()), 387 - }, 388 - } 389 - dpopProof, _ := dpop.Create(jwt.SigningMethodES256, claims, dpopKey) 390 - 391 - // Exchange (with DPoP header added by our transport) 392 - token, _ := config.Exchange(ctx, code, 393 - oauth2.SetAuthURLParam("code_verifier", pkceVerifier), 394 - ) 395 - 396 - 7. Save token + DPoP key (storage.go) 397 - store := &TokenStore{ 398 - AccessToken: token.AccessToken, 399 - RefreshToken: token.RefreshToken, 400 - DPoPKey: dpopKey, 401 - ExpiresAt: token.Expiry, 402 - } 403 - store.Save("~/.atcr/oauth-tokens.json") 404 - 405 - Later, when docker push happens: 406 - docker push atcr.io/alice/myapp:latest 407 - 408 - 1. Docker calls credential helper: docker-credential-atcr get 409 - 2. Helper loads stored token 410 - 3. Helper calls /auth/exchange with OAuth token → gets registry JWT 411 - 4. Returns JWT to Docker 412 - 5. Docker uses JWT for push 413 - 414 - Directory Structure 415 - 416 - pkg/auth/oauth/ 417 - ├── client.go # OAuth client with DPoP integration 418 - ├── discovery.go # Authorization server discovery 419 - ├── metadata.go # Client metadata server 420 - ├── storage.go # Token persistence 421 - └── transport.go # HTTP transport that adds DPoP headers 422 - 423 - cmd/credential-helper/ 424 - ├── main.go # Docker credential helper protocol 425 - ├── oauth.go # OAuth flow (browser, callback) 426 - └── config.go # Configuration 427 - 428 - go.mod additions: 429 - authelia.com/client/oauth2 v0.25.0 430 - github.com/AxisCommunications/go-dpop v1.1.2 431 - 432 - Unified Model 433 - 434 - Every hold service requires HOLD_OWNER: 435 - - Owner's PDS has the io.atcr.hold record 436 - - Owner's PDS has all io.atcr.hold.crew records 437 - - Authorization is always governed by PDS records 438 - 439 - For "public" hold (like Tangled's public knot): 440 - - Owner creates hold with public: true 441 - - Anyone can push/pull without being crew 442 - - Owner can add crew records for special privileges/tracking if desired 443 - 444 - Config has emergency override: 445 - auth: 446 - # Emergency freeze: ignore public setting, restrict to crew only 447 - # Use this to stop abuse without changing PDS records 448 - freeze: false 449 - 450 - Authorization logic: 451 - 1. Check freeze in config → if true, skip to crew check 452 - 2. Query owner's PDS for io.atcr.hold record 453 - 3. If public: true → allow all operations (unless frozen) 454 - 4. If public: false OR frozen → query io.atcr.hold.crew records, check membership 455 - 456 - Remove from config: 457 - - allow_all (replaced by public: true in PDS) 458 - - allowed_dids (replaced by crew records in PDS) 459 - 460 - This way the hold owner at atcr.io can run a public hold at hold1.atcr.io that anyone can use, but can freeze it instantly if needed without touching PDS records.
-334
docs/TESTING.md
··· 1 - # Local Testing Guide 2 - 3 - ## Quick Start 4 - 5 - ```bash 6 - ./test-local.sh 7 - ``` 8 - 9 - This automated script will: 10 - 1. Create storage directories 11 - 2. Build all binaries 12 - 3. Start both services 13 - 4. Show test commands 14 - 15 - ## Manual Testing Steps 16 - 17 - ### 1. Setup Directories 18 - 19 - ```bash 20 - sudo mkdir -p /var/lib/atcr/{blobs,hold,auth} 21 - sudo chown -R $USER:$USER /var/lib/atcr 22 - ``` 23 - 24 - ### 2. Build Binaries 25 - 26 - ```bash 27 - go build -o atcr-appview ./cmd/appview 28 - go build -o atcr-hold ./cmd/hold 29 - go build -o docker-credential-atcr ./cmd/credential-helper 30 - ``` 31 - 32 - ### 3. Configure Environment 33 - 34 - Create a `.env` file in the project root: 35 - 36 - ```bash 37 - cp .env.example .env 38 - ``` 39 - 40 - Edit `.env` with your credentials: 41 - 42 - ```env 43 - # Your ATProto handle 44 - ATPROTO_HANDLE=your-handle.bsky.social 45 - 46 - # Hold service public URL (hostname becomes the hold name) 47 - HOLD_PUBLIC_URL=http://127.0.0.1:8080 48 - 49 - # Enable OAuth registration on startup 50 - HOLD_AUTO_REGISTER=true 51 - ``` 52 - 53 - **Notes:** 54 - - Use your Bluesky handle (e.g., `alice.bsky.social`) 55 - - For localhost, use `127.0.0.1` instead of `localhost` for OAuth 56 - - The hostname from the URL becomes the hold name (e.g., `127.0.0.1` or `hold1.atcr.io`) 57 - 58 - **Load environment:** 59 - ```bash 60 - export $(cat .env | xargs) 61 - ``` 62 - 63 - ### 4. Start Services 64 - 65 - **Terminal 1 - AppView:** 66 - ```bash 67 - ./atcr-appview serve config/config.yml 68 - ``` 69 - 70 - **Terminal 2 - Hold:** 71 - ```bash 72 - ./atcr-hold config/hold.yml 73 - ``` 74 - 75 - ### 5. Start Services and OAuth Registration 76 - 77 - **Terminal 1 - AppView:** 78 - ```bash 79 - ./atcr-appview serve config/config.yml 80 - ``` 81 - 82 - **Terminal 2 - Hold (OAuth registration):** 83 - ```bash 84 - ./atcr-hold config/hold.yml 85 - ``` 86 - 87 - The hold service will start an OAuth flow. You'll see output like: 88 - 89 - ``` 90 - ================================================================================ 91 - OAUTH AUTHORIZATION REQUIRED 92 - ================================================================================ 93 - 94 - Please visit this URL to authorize the hold service: 95 - 96 - https://bsky.social/oauth/authorize?... 97 - 98 - Waiting for authorization... 99 - ================================================================================ 100 - ``` 101 - 102 - **Steps:** 103 - 1. Copy the OAuth URL from the logs 104 - 2. Open it in your browser 105 - 3. Sign in to Bluesky and authorize 106 - 4. The callback will complete automatically 107 - 5. Hold service registers in your PDS 108 - 109 - After successful OAuth, you'll see: 110 - ``` 111 - ✓ Created hold record: at://did:plc:.../io.atcr.hold/127.0.0.1 112 - ✓ Created crew record: at://did:plc:.../io.atcr.hold.crew/127.0.0.1-did:plc:... 113 - ================================================================================ 114 - REGISTRATION COMPLETE 115 - ================================================================================ 116 - Hold service is now registered and ready to use! 117 - ``` 118 - 119 - This creates two records in your PDS: 120 - - `io.atcr.hold` - Defines the storage endpoint URL 121 - - `io.atcr.hold.crew` - Grants you admin access 122 - 123 - ### 6. Test Docker Push/Pull 124 - 125 - **Test 1: Basic Push** 126 - ```bash 127 - # Tag an image 128 - docker tag alpine:latest localhost:5000/alice/alpine:test 129 - 130 - # Push to local registry 131 - docker push localhost:5000/alice/alpine:test 132 - ``` 133 - 134 - **Test 2: Pull** 135 - ```bash 136 - # Remove local image 137 - docker rmi localhost:5000/alice/alpine:test 138 - 139 - # Pull from registry 140 - docker pull localhost:5000/alice/alpine:test 141 - ``` 142 - 143 - **Test 3: Verify Storage** 144 - ```bash 145 - # Check manifests were stored in ATProto 146 - # (Check your PDS for io.atcr.manifest records) 147 - 148 - # Check blobs were stored locally 149 - ls -lh /var/lib/atcr/blobs/docker/registry/v2/ 150 - ``` 151 - 152 - ## OAuth Testing (Optional) 153 - 154 - ### Setup Credential Helper 155 - 156 - ```bash 157 - # Configure OAuth 158 - ./docker-credential-atcr configure 159 - 160 - # Follow the browser flow to authorize 161 - 162 - # Verify token was saved 163 - ls -la ~/.atcr/oauth-token.json 164 - ``` 165 - 166 - ### Configure Docker to Use Helper 167 - 168 - Edit `~/.docker/config.json`: 169 - ```json 170 - { 171 - "credHelpers": { 172 - "localhost:5000": "atcr" 173 - } 174 - } 175 - ``` 176 - 177 - ### Test with OAuth 178 - 179 - ```bash 180 - # Push should now use OAuth automatically 181 - docker push localhost:5000/alice/myapp:latest 182 - ``` 183 - 184 - ## Troubleshooting 185 - 186 - ### Registry won't start 187 - 188 - **Error:** `failed to create storage driver` 189 - ```bash 190 - # Check directory permissions 191 - ls -ld /var/lib/atcr/blobs 192 - # Should be owned by your user 193 - 194 - # Fix permissions 195 - sudo chown -R $USER:$USER /var/lib/atcr 196 - ``` 197 - 198 - **Error:** `address already in use` 199 - ```bash 200 - # Check what's using port 5000 201 - lsof -i :5000 202 - 203 - # Kill existing process 204 - kill $(lsof -t -i :5000) 205 - ``` 206 - 207 - ### Hold service won't start 208 - 209 - **Error:** `failed to create storage driver` 210 - ```bash 211 - # Check hold directory 212 - ls -ld /var/lib/atcr/hold 213 - sudo chown -R $USER:$USER /var/lib/atcr/hold 214 - ``` 215 - 216 - **Error:** `address already in use` 217 - ```bash 218 - # Check port 8080 219 - lsof -i :8080 220 - kill $(lsof -t -i :8080) 221 - ``` 222 - 223 - ### Docker push fails 224 - 225 - **Error:** `unauthorized: authentication required` 226 - - Check `ATPROTO_DID` and `ATPROTO_ACCESS_TOKEN` are set 227 - - Verify token is valid (not expired) 228 - - Check registry logs for auth errors 229 - 230 - **Error:** `denied: requested access to the resource is denied` 231 - - Check the identity in the image name matches your DID 232 - - Example: If your handle is `alice.bsky.social`, use: 233 - ```bash 234 - docker push localhost:5000/alice/myapp:test 235 - # NOT localhost:5000/bob/myapp:test 236 - ``` 237 - 238 - **Error:** `failed to resolve identity` 239 - - Check internet connection (needs to resolve DIDs) 240 - - Verify handle is correct 241 - - Try using DID directly instead of handle 242 - 243 - ### OAuth issues 244 - 245 - **Error:** `Failed to exchange token` 246 - - Ensure registry is running and accessible 247 - - Check `/auth/exchange` endpoint is responding 248 - - Verify OAuth token hasn't expired 249 - 250 - **Error:** `Token validation failed` 251 - - Token might be expired 252 - - Run `./docker-credential-atcr configure` again 253 - - Check PDS is accessible 254 - 255 - ## Verifying the Flow 256 - 257 - ### Check Registry is Running 258 - ```bash 259 - curl http://localhost:5000/v2/ 260 - # Should return: {} 261 - ``` 262 - 263 - ### Check Hold is Running 264 - ```bash 265 - curl http://localhost:8080/health 266 - # Should return: {"status":"ok"} 267 - ``` 268 - 269 - ### Check Auth Endpoint 270 - ```bash 271 - curl -v http://localhost:5000/v2/ 272 - # Should return 401 with WWW-Authenticate header 273 - ``` 274 - 275 - ### Inspect Stored Data 276 - 277 - **Manifests (in ATProto):** 278 - - Check your PDS web interface 279 - - Look for `io.atcr.manifest` collection records 280 - 281 - **Blobs (local filesystem):** 282 - ```bash 283 - # List blobs 284 - find /var/lib/atcr/blobs -type f 285 - 286 - # Check blob content (should be binary) 287 - ls -lh /var/lib/atcr/blobs/docker/registry/v2/blobs/sha256/ 288 - ``` 289 - 290 - ## Clean Up 291 - 292 - ### Stop Services 293 - ```bash 294 - # If using test script 295 - kill $(cat .atcr-pids) 296 - 297 - # Or manually 298 - pkill atcr-appview 299 - pkill atcr-hold 300 - ``` 301 - 302 - ### Remove Test Data 303 - ```bash 304 - # Remove all stored data 305 - sudo rm -rf /var/lib/atcr/* 306 - 307 - # Remove OAuth tokens 308 - rm -rf ~/.atcr/ 309 - ``` 310 - 311 - ### Reset Docker Config 312 - ```bash 313 - # Remove credential helper config 314 - # Edit ~/.docker/config.json and remove "credHelpers" section 315 - ``` 316 - 317 - ## Next Steps 318 - 319 - Once local testing works: 320 - 321 - 1. **Deploy to production:** 322 - - Use S3/Storj for blob storage 323 - - Deploy registry and hold to separate hosts 324 - - Configure DNS for `atcr.io` 325 - 326 - 2. **Enable BYOS:** 327 - - Users create `io.atcr.hold` records 328 - - Deploy their own hold service 329 - - AppView automatically routes to their storage 330 - 331 - 3. **Add monitoring:** 332 - - Registry metrics 333 - - Hold service metrics 334 - - Storage usage tracking