···4343 return
4444 }
45454646- // Get OAuth session for the authenticated user
4747- slog.Debug("Getting OAuth session for star", "user_did", user.DID)
4848- session, err := h.Refresher.GetSession(r.Context(), user.DID)
4949- if err != nil {
5050- slog.Warn("Failed to get OAuth session for star", "user_did", user.DID, "error", err)
5151- http.Error(w, fmt.Sprintf("Failed to get OAuth session: %v", err), http.StatusUnauthorized)
5252- return
5353- }
5454-5555- // Get user's PDS client (use indigo's API client which handles DPoP automatically)
5656- apiClient := session.APIClient()
5757- pdsClient := atproto.NewClientWithIndigoClient(user.PDSEndpoint, user.DID, apiClient)
4646+ // Create ATProto client with session provider (uses DoWithSession for DPoP nonce safety)
4747+ slog.Debug("Creating PDS client for star", "user_did", user.DID)
4848+ pdsClient := atproto.NewClientWithSessionProvider(user.PDSEndpoint, user.DID, h.Refresher)
58495950 // Create star record
6051 starRecord := atproto.NewStarRecord(ownerDID, repository)
···10697 return
10798 }
10899109109- // Get OAuth session for the authenticated user
110110- slog.Debug("Getting OAuth session for unstar", "user_did", user.DID)
111111- session, err := h.Refresher.GetSession(r.Context(), user.DID)
112112- if err != nil {
113113- slog.Warn("Failed to get OAuth session for unstar", "user_did", user.DID, "error", err)
114114- http.Error(w, fmt.Sprintf("Failed to get OAuth session: %v", err), http.StatusUnauthorized)
115115- return
116116- }
117117-118118- // Get user's PDS client (use indigo's API client which handles DPoP automatically)
119119- apiClient := session.APIClient()
120120- pdsClient := atproto.NewClientWithIndigoClient(user.PDSEndpoint, user.DID, apiClient)
100100+ // Create ATProto client with session provider (uses DoWithSession for DPoP nonce safety)
101101+ slog.Debug("Creating PDS client for unstar", "user_did", user.DID)
102102+ pdsClient := atproto.NewClientWithSessionProvider(user.PDSEndpoint, user.DID, h.Refresher)
121103122104 // Delete star record from user's PDS
123105 rkey := atproto.StarRecordKey(ownerDID, repository)
···172154 return
173155 }
174156175175- // Get OAuth session for the authenticated user
176176- session, err := h.Refresher.GetSession(r.Context(), user.DID)
177177- if err != nil {
178178- slog.Debug("Failed to get OAuth session for check star", "user_did", user.DID, "error", err)
179179- // No OAuth session - return not starred
180180- w.Header().Set("Content-Type", "application/json")
181181- json.NewEncoder(w).Encode(map[string]bool{"starred": false})
182182- return
183183- }
184184-185185- // Get user's PDS client (use indigo's API client which handles DPoP automatically)
186186- apiClient := session.APIClient()
187187- pdsClient := atproto.NewClientWithIndigoClient(user.PDSEndpoint, user.DID, apiClient)
157157+ // Create ATProto client with session provider (uses DoWithSession for DPoP nonce safety)
158158+ // Note: Error handling moves to the PDS call - if session doesn't exist, GetRecord will fail
159159+ pdsClient := atproto.NewClientWithSessionProvider(user.PDSEndpoint, user.DID, h.Refresher)
188160189161 // Check if star record exists
190162 rkey := atproto.StarRecordKey(ownerDID, repository)
+4-20
pkg/appview/handlers/images.go
···3030 repo := chi.URLParam(r, "repository")
3131 tag := chi.URLParam(r, "tag")
32323333- // Get OAuth session for the authenticated user
3434- session, err := h.Refresher.GetSession(r.Context(), user.DID)
3535- if err != nil {
3636- http.Error(w, fmt.Sprintf("Failed to get OAuth session: %v", err), http.StatusUnauthorized)
3737- return
3838- }
3939-4040- // Create ATProto client with OAuth credentials
4141- apiClient := session.APIClient()
4242- pdsClient := atproto.NewClientWithIndigoClient(user.PDSEndpoint, user.DID, apiClient)
3333+ // Create ATProto client with session provider (uses DoWithSession for DPoP nonce safety)
3434+ pdsClient := atproto.NewClientWithSessionProvider(user.PDSEndpoint, user.DID, h.Refresher)
43354436 // Compute rkey for tag record (repository_tag with slashes replaced)
4537 rkey := fmt.Sprintf("%s_%s", repo, tag)
···108100 return
109101 }
110102111111- // Get OAuth session for the authenticated user
112112- session, err := h.Refresher.GetSession(r.Context(), user.DID)
113113- if err != nil {
114114- http.Error(w, fmt.Sprintf("Failed to get OAuth session: %v", err), http.StatusUnauthorized)
115115- return
116116- }
117117-118118- // Create ATProto client with OAuth credentials
119119- apiClient := session.APIClient()
120120- pdsClient := atproto.NewClientWithIndigoClient(user.PDSEndpoint, user.DID, apiClient)
103103+ // Create ATProto client with session provider (uses DoWithSession for DPoP nonce safety)
104104+ pdsClient := atproto.NewClientWithSessionProvider(user.PDSEndpoint, user.DID, h.Refresher)
121105122106 // If tagged and confirmed, delete all tags first
123107 if tagged && confirmed {
+6-11
pkg/appview/handlers/repository.go
···163163 isStarred := false
164164 user := middleware.GetUser(r)
165165 if user != nil && h.Refresher != nil && h.Directory != nil {
166166- // Get OAuth session for the authenticated user
167167- session, err := h.Refresher.GetSession(r.Context(), user.DID)
168168- if err == nil {
169169- // Get user's PDS client
170170- apiClient := session.APIClient()
171171- pdsClient := atproto.NewClientWithIndigoClient(user.PDSEndpoint, user.DID, apiClient)
166166+ // Create ATProto client with session provider (uses DoWithSession for DPoP nonce safety)
167167+ pdsClient := atproto.NewClientWithSessionProvider(user.PDSEndpoint, user.DID, h.Refresher)
172168173173- // Check if star record exists
174174- rkey := atproto.StarRecordKey(owner.DID, repository)
175175- _, err = pdsClient.GetRecord(r.Context(), atproto.StarCollection, rkey)
176176- isStarred = (err == nil)
177177- }
169169+ // Check if star record exists
170170+ rkey := atproto.StarRecordKey(owner.DID, repository)
171171+ _, err := pdsClient.GetRecord(r.Context(), atproto.StarCollection, rkey)
172172+ isStarred = (err == nil)
178173 }
179174180175 // Check if current user is the repository owner
+4-28
pkg/appview/handlers/settings.go
···2626 return
2727 }
28282929- // Get OAuth session for the user
3030- session, err := h.Refresher.GetSession(r.Context(), user.DID)
3131- if err != nil {
3232- // OAuth session not found or expired - redirect to re-authenticate
3333- slog.Warn("OAuth session not found, redirecting to login", "component", "settings", "did", user.DID, "error", err)
3434- http.Redirect(w, r, "/auth/oauth/login?return_to=/settings", http.StatusFound)
3535- return
3636- }
3737-3838- // Use indigo's API client directly - it handles all auth automatically
3939- apiClient := session.APIClient()
4040-4141- // Create ATProto client with indigo's XRPC client
4242- client := atproto.NewClientWithIndigoClient(user.PDSEndpoint, user.DID, apiClient)
2929+ // Create ATProto client with session provider (uses DoWithSession for DPoP nonce safety)
3030+ client := atproto.NewClientWithSessionProvider(user.PDSEndpoint, user.DID, h.Refresher)
43314432 // Fetch sailor profile
4533 profile, err := storage.GetProfile(r.Context(), client)
···96849785 holdEndpoint := r.FormValue("hold_endpoint")
98869999- // Get OAuth session for the user
100100- session, err := h.Refresher.GetSession(r.Context(), user.DID)
101101- if err != nil {
102102- // OAuth session not found or expired - redirect to re-authenticate
103103- slog.Warn("OAuth session not found, redirecting to login", "component", "settings", "did", user.DID, "error", err)
104104- http.Redirect(w, r, "/auth/oauth/login?return_to=/settings", http.StatusFound)
105105- return
106106- }
107107-108108- // Use indigo's API client directly - it handles all auth automatically
109109- apiClient := session.APIClient()
110110-111111- // Create ATProto client with indigo's XRPC client
112112- client := atproto.NewClientWithIndigoClient(user.PDSEndpoint, user.DID, apiClient)
8787+ // Create ATProto client with session provider (uses DoWithSession for DPoP nonce safety)
8888+ client := atproto.NewClientWithSessionProvider(user.PDSEndpoint, user.DID, h.Refresher)
1138911490 // Fetch existing profile or create new one
11591 profile, err := storage.GetProfile(r.Context(), client)
+3-9
pkg/appview/middleware/registry.go
···409409 var atprotoClient *atproto.Client
410410411411 if nr.refresher != nil {
412412- // Try OAuth flow first
413413- session, err := nr.refresher.GetSession(ctx, did)
414414- if err == nil {
415415- // OAuth session available - use indigo's API client (handles DPoP automatically)
416416- apiClient := session.APIClient()
417417- atprotoClient = atproto.NewClientWithIndigoClient(pdsEndpoint, did, apiClient)
418418- } else {
419419- slog.Debug("OAuth refresh failed, falling back to Basic Auth", "component", "registry/middleware", "did", did, "error", err)
420420- }
412412+ // Use session provider for locked OAuth sessions
413413+ // This prevents DPoP nonce race conditions during concurrent layer uploads
414414+ atprotoClient = atproto.NewClientWithSessionProvider(pdsEndpoint, did, nr.refresher)
421415 }
422416423417 // Fall back to Basic Auth token cache if OAuth not available
+71-57
pkg/atproto/client.go
···1212 "strings"
13131414 "github.com/bluesky-social/indigo/atproto/atclient"
1515+ indigo_oauth "github.com/bluesky-social/indigo/atproto/auth/oauth"
1516)
16171718// Sentinel errors
···1920 ErrRecordNotFound = errors.New("record not found")
2021)
21222323+// SessionProvider provides locked OAuth sessions for PDS operations.
2424+// This interface allows the ATProto client to use DoWithSession() for each PDS call,
2525+// preventing DPoP nonce race conditions during concurrent operations.
2626+type SessionProvider interface {
2727+ // DoWithSession executes fn with a locked OAuth session.
2828+ // The lock is held for the entire duration, serializing DPoP nonce updates.
2929+ DoWithSession(ctx context.Context, did string, fn func(session *indigo_oauth.ClientSession) error) error
3030+}
3131+2232// Client wraps ATProto operations for the registry
2333type Client struct {
2434 pdsEndpoint string
2535 did string
2636 accessToken string // For Basic Auth only
2737 httpClient *http.Client
2828- useIndigoClient bool // true if using indigo's OAuth client (handles auth automatically)
2929- indigoClient *atclient.APIClient // indigo's API client for OAuth requests
3838+ sessionProvider SessionProvider // For locked OAuth sessions (prevents DPoP nonce races)
3039}
31403241// NewClient creates a new ATProto client for Basic Auth tokens (app passwords)
···3948 }
4049}
41504242-// NewClientWithIndigoClient creates an ATProto client using indigo's API client
4343-// This uses indigo's native XRPC methods with automatic DPoP handling
4444-func NewClientWithIndigoClient(pdsEndpoint, did string, indigoClient *atclient.APIClient) *Client {
5151+// NewClientWithSessionProvider creates an ATProto client that uses locked OAuth sessions.
5252+// This is the preferred constructor for concurrent operations (e.g., Docker layer uploads)
5353+// as it prevents DPoP nonce race conditions by serializing PDS calls per-DID.
5454+//
5555+// Each PDS call acquires a per-DID lock, ensuring that:
5656+// - Only one goroutine at a time can negotiate DPoP nonces with the PDS
5757+// - The session's nonce is saved to DB before other goroutines load it
5858+// - Concurrent manifest operations don't cause nonce thrashing
5959+func NewClientWithSessionProvider(pdsEndpoint, did string, sessionProvider SessionProvider) *Client {
4560 return &Client{
4661 pdsEndpoint: pdsEndpoint,
4762 did: did,
4848- useIndigoClient: true,
4949- indigoClient: indigoClient,
5050- httpClient: indigoClient.Client, // Keep for any fallback cases
6363+ sessionProvider: sessionProvider,
6464+ httpClient: &http.Client{},
5165 }
5266}
5367···6781 "record": record,
6882 }
69837070- // Use indigo API client (OAuth with DPoP)
7171- if c.useIndigoClient && c.indigoClient != nil {
8484+ // Use session provider (locked OAuth with DPoP) - prevents nonce races
8585+ if c.sessionProvider != nil {
7286 var result Record
7373- err := c.indigoClient.Post(ctx, "com.atproto.repo.putRecord", payload, &result)
8787+ err := c.sessionProvider.DoWithSession(ctx, c.did, func(session *indigo_oauth.ClientSession) error {
8888+ apiClient := session.APIClient()
8989+ return apiClient.Post(ctx, "com.atproto.repo.putRecord", payload, &result)
9090+ })
7491 if err != nil {
7592 return nil, fmt.Errorf("putRecord failed: %w", err)
7693 }
···113130114131// GetRecord retrieves a record from the ATProto repository
115132func (c *Client) GetRecord(ctx context.Context, collection, rkey string) (*Record, error) {
116116- // Use indigo API client (OAuth with DPoP)
117117- if c.useIndigoClient && c.indigoClient != nil {
118118- params := map[string]any{
119119- "repo": c.did,
120120- "collection": collection,
121121- "rkey": rkey,
122122- }
133133+ params := map[string]any{
134134+ "repo": c.did,
135135+ "collection": collection,
136136+ "rkey": rkey,
137137+ }
123138139139+ // Use session provider (locked OAuth with DPoP) - prevents nonce races
140140+ if c.sessionProvider != nil {
124141 var result Record
125125- err := c.indigoClient.Get(ctx, "com.atproto.repo.getRecord", params, &result)
142142+ err := c.sessionProvider.DoWithSession(ctx, c.did, func(session *indigo_oauth.ClientSession) error {
143143+ apiClient := session.APIClient()
144144+ return apiClient.Get(ctx, "com.atproto.repo.getRecord", params, &result)
145145+ })
126146 if err != nil {
127147 // Check for RecordNotFound error from indigo's APIError type
128148 var apiErr *atclient.APIError
···187207 "rkey": rkey,
188208 }
189209190190- // Use indigo API client (OAuth with DPoP)
191191- if c.useIndigoClient && c.indigoClient != nil {
192192- var result map[string]any // deleteRecord returns empty object on success
193193- err := c.indigoClient.Post(ctx, "com.atproto.repo.deleteRecord", payload, &result)
210210+ // Use session provider (locked OAuth with DPoP) - prevents nonce races
211211+ if c.sessionProvider != nil {
212212+ err := c.sessionProvider.DoWithSession(ctx, c.did, func(session *indigo_oauth.ClientSession) error {
213213+ apiClient := session.APIClient()
214214+ var result map[string]any // deleteRecord returns empty object on success
215215+ return apiClient.Post(ctx, "com.atproto.repo.deleteRecord", payload, &result)
216216+ })
194217 if err != nil {
195218 return fmt.Errorf("deleteRecord failed: %w", err)
196219 }
···279302280303// UploadBlob uploads binary data to the PDS and returns a blob reference
281304func (c *Client) UploadBlob(ctx context.Context, data []byte, mimeType string) (*ATProtoBlobRef, error) {
282282- // Use indigo API client (OAuth with DPoP)
283283- if c.useIndigoClient && c.indigoClient != nil {
305305+ // Use session provider (locked OAuth with DPoP) - prevents nonce races
306306+ if c.sessionProvider != nil {
284307 var result struct {
285308 Blob ATProtoBlobRef `json:"blob"`
286309 }
287310288288- err := c.indigoClient.LexDo(ctx,
289289- "POST",
290290- mimeType,
291291- "com.atproto.repo.uploadBlob",
292292- nil,
293293- data,
294294- &result,
295295- )
311311+ err := c.sessionProvider.DoWithSession(ctx, c.did, func(session *indigo_oauth.ClientSession) error {
312312+ apiClient := session.APIClient()
313313+ return apiClient.LexDo(ctx,
314314+ "POST",
315315+ mimeType,
316316+ "com.atproto.repo.uploadBlob",
317317+ nil,
318318+ data,
319319+ &result,
320320+ )
321321+ })
296322 if err != nil {
297323 return nil, fmt.Errorf("uploadBlob failed: %w", err)
298324 }
···510536// GetActorProfile fetches an actor's profile from their PDS
511537// The actor parameter can be a DID or handle
512538func (c *Client) GetActorProfile(ctx context.Context, actor string) (*ActorProfile, error) {
513513- // Use indigo API client (OAuth with DPoP)
514514- if c.useIndigoClient && c.indigoClient != nil {
515515- params := map[string]any{
516516- "actor": actor,
517517- }
518518-519519- var profile ActorProfile
520520- err := c.indigoClient.Get(ctx, "app.bsky.actor.getProfile", params, &profile)
521521- if err != nil {
522522- return nil, fmt.Errorf("getProfile failed: %w", err)
523523- }
524524- return &profile, nil
525525- }
526526-527527- // Basic Auth (app passwords)
539539+ // Basic Auth (app passwords) or unauthenticated
528540 url := fmt.Sprintf("%s/xrpc/app.bsky.actor.getProfile?actor=%s", c.pdsEndpoint, actor)
529541530542 req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
···563575// GetProfileRecord fetches the app.bsky.actor.profile record from PDS
564576// This returns the raw profile record with blob references (not CDN URLs)
565577func (c *Client) GetProfileRecord(ctx context.Context, did string) (*ProfileRecord, error) {
566566- // Use indigo API client (OAuth with DPoP)
567567- if c.useIndigoClient && c.indigoClient != nil {
568568- params := map[string]any{
569569- "repo": did,
570570- "collection": "app.bsky.actor.profile",
571571- "rkey": "self",
572572- }
578578+ params := map[string]any{
579579+ "repo": did,
580580+ "collection": "app.bsky.actor.profile",
581581+ "rkey": "self",
582582+ }
573583584584+ // Use session provider (locked OAuth with DPoP) - prevents nonce races
585585+ if c.sessionProvider != nil {
574586 var result struct {
575587 Value ProfileRecord `json:"value"`
576588 }
577577-578578- err := c.indigoClient.Get(ctx, "com.atproto.repo.getRecord", params, &result)
589589+ err := c.sessionProvider.DoWithSession(ctx, c.did, func(session *indigo_oauth.ClientSession) error {
590590+ apiClient := session.APIClient()
591591+ return apiClient.Get(ctx, "com.atproto.repo.getRecord", params, &result)
592592+ })
579593 if err != nil {
580594 return nil, fmt.Errorf("getRecord failed: %w", err)
581595 }
+2-17
pkg/atproto/client_test.go
···2323 if client.accessToken != "token123" {
2424 t.Errorf("accessToken = %v, want token123", client.accessToken)
2525 }
2626- if client.useIndigoClient {
2727- t.Error("useIndigoClient should be false for Basic Auth client")
2626+ if client.sessionProvider != nil {
2727+ t.Error("sessionProvider should be nil for Basic Auth client")
2828 }
2929}
3030···10011001 if client.PDSEndpoint() != expectedEndpoint {
10021002 t.Errorf("PDSEndpoint() = %v, want %v", client.PDSEndpoint(), expectedEndpoint)
10031003 }
10041004-}
10051005-10061006-// TestNewClientWithIndigoClient tests client initialization with Indigo client
10071007-func TestNewClientWithIndigoClient(t *testing.T) {
10081008- // Note: We can't easily create a real indigo client in tests without complex setup
10091009- // We pass nil for the indigo client, which is acceptable for testing the constructor
10101010- // The actual client.go code will handle nil indigo client by checking before use
10111011-10121012- // Skip this test for now as it requires a real indigo client
10131013- // The function is tested indirectly through integration tests
10141014- t.Skip("Skipping TestNewClientWithIndigoClient - requires real indigo client setup")
10151015-10161016- // When properly set up with a real indigo client, the test would look like:
10171017- // client := NewClientWithIndigoClient("https://pds.example.com", "did:plc:test123", indigoClient)
10181018- // if !client.useIndigoClient { t.Error("useIndigoClient should be true") }
10191004}
1020100510211006// TestListRecordsError tests error handling in ListRecords
+41-19
pkg/auth/oauth/client.go
···169169 r.uiSessionStore = store
170170}
171171172172-// GetSession gets a fresh OAuth session for a DID
173173-// Loads session from database on every request (database is source of truth)
174174-// Uses per-DID locking to prevent concurrent requests from racing on DPoP nonce updates
172172+// DoWithSession executes a function with a locked OAuth session.
173173+// The lock is held for the entire duration of the function, preventing DPoP nonce races.
174174+//
175175+// This is the preferred way to make PDS requests that require OAuth/DPoP authentication.
176176+// The lock is held through the entire PDS interaction, ensuring that:
177177+// 1. Only one goroutine at a time can negotiate DPoP nonces with the PDS for a given DID
178178+// 2. The session's PersistSessionCallback saves the updated nonce before other goroutines load
179179+// 3. Concurrent layer uploads don't race on stale nonces
175180//
176181// Why locking is critical:
177182// During docker push, multiple layers upload concurrently. Each layer creates a new
178183// ClientSession by loading from database. Without locking, this race condition occurs:
179179-// 1. Layer A loads session with stale DPoP nonce from DB
180180-// 2. Layer B loads session with same stale nonce (A hasn't updated DB yet)
181181-// 3. Layer A makes request → 401 "use_dpop_nonce" → gets fresh nonce → saves to DB
182182-// 4. Layer B makes request → 401 "use_dpop_nonce" (using stale nonce from step 2)
183183-// 5. DPoP nonce thrashing continues, eventually causing 500 errors
184184+// 1. Layer A loads session with stale DPoP nonce from DB
185185+// 2. Layer B loads session with same stale nonce (A hasn't updated DB yet)
186186+// 3. Layer A makes request → 401 "use_dpop_nonce" → gets fresh nonce → saves to DB
187187+// 4. Layer B makes request → 401 "use_dpop_nonce" (using stale nonce from step 2)
188188+// 5. DPoP nonce thrashing continues, eventually causing 500 errors
184189//
185190// With per-DID locking:
186186-// 1. Layer A acquires lock, loads session, handles nonce negotiation, saves, releases lock
187187-// 2. Layer B acquires lock AFTER A releases, loads fresh nonce from DB, succeeds
188188-func (r *Refresher) GetSession(ctx context.Context, did string) (*oauth.ClientSession, error) {
189189- // Get or create a mutex for this DID to prevent concurrent session loads
190190- // This prevents DPoP nonce race conditions when multiple layers upload simultaneously
191191+// 1. Layer A acquires lock, loads session, handles nonce negotiation, saves, releases lock
192192+// 2. Layer B acquires lock AFTER A releases, loads fresh nonce from DB, succeeds
193193+//
194194+// Example usage:
195195+//
196196+// var result MyResult
197197+// err := refresher.DoWithSession(ctx, did, func(session *oauth.ClientSession) error {
198198+// resp, err := session.DoWithAuth(session.Client, req, "com.atproto.server.getServiceAuth")
199199+// if err != nil {
200200+// return err
201201+// }
202202+// // Parse response into result...
203203+// return nil
204204+// })
205205+func (r *Refresher) DoWithSession(ctx context.Context, did string, fn func(session *oauth.ClientSession) error) error {
206206+ // Get or create a mutex for this DID
191207 mutexInterface, _ := r.didLocks.LoadOrStore(did, &sync.Mutex{})
192208 mutex := mutexInterface.(*sync.Mutex)
193209194194- // Serialize session loading per DID
210210+ // Hold the lock for the ENTIRE operation (load + PDS request + nonce save)
195211 mutex.Lock()
196212 defer mutex.Unlock()
197213198198- slog.Debug("Acquired session lock for DID",
214214+ slog.Debug("Acquired session lock for DoWithSession",
199215 "component", "oauth/refresher",
200216 "did", did)
201217218218+ // Load session while holding lock
202219 session, err := r.resumeSession(ctx, did)
203220 if err != nil {
204204- return nil, err
221221+ return err
205222 }
206223207207- slog.Debug("Released session lock for DID",
224224+ // Execute the function (PDS request) while still holding lock
225225+ // The session's PersistSessionCallback will save nonce updates to DB
226226+ err = fn(session)
227227+228228+ slog.Debug("Released session lock for DoWithSession",
208229 "component", "oauth/refresher",
209209- "did", did)
230230+ "did", did,
231231+ "success", err == nil)
210232211211- return session, nil
233233+ return err
212234}
213235214236// resumeSession loads a session from storage
+113-90
pkg/auth/token/servicetoken.go
···1515 "atcr.io/pkg/auth"
1616 "atcr.io/pkg/auth/oauth"
1717 "github.com/bluesky-social/indigo/atproto/atclient"
1818+ indigo_oauth "github.com/bluesky-social/indigo/atproto/auth/oauth"
1819)
19202021// getErrorHint provides context-specific troubleshooting hints based on API error type
···4748// GetOrFetchServiceToken gets a service token for hold authentication.
4849// Checks cache first, then fetches from PDS with OAuth/DPoP if needed.
4950// This is the canonical implementation used by both middleware and crew registration.
5151+//
5252+// IMPORTANT: Uses DoWithSession() to hold a per-DID lock through the entire PDS interaction.
5353+// This prevents DPoP nonce race conditions when multiple Docker layers upload concurrently.
5054func GetOrFetchServiceToken(
5155 ctx context.Context,
5256 refresher *oauth.Refresher,
···7478 slog.Debug("Service token expiring soon, proactively renewing", "did", did)
7579 }
76807777- session, err := refresher.GetSession(ctx, did)
7878- if err != nil {
7979- // OAuth session unavailable - fail
8080- InvalidateServiceToken(did, holdDID)
8181+ // Use DoWithSession to hold the lock through the entire PDS interaction.
8282+ // This prevents DPoP nonce races when multiple goroutines try to fetch service tokens.
8383+ var serviceToken string
8484+ var fetchErr error
81858282- // Try to extract detailed error information
8383- var apiErr *atclient.APIError
8484- if errors.As(err, &apiErr) {
8585- slog.Error("Failed to get OAuth session for service token",
8686- "component", "token/servicetoken",
8686+ err := refresher.DoWithSession(ctx, did, func(session *indigo_oauth.ClientSession) error {
8787+ // Double-check cache after acquiring lock - another goroutine may have
8888+ // populated it while we were waiting (classic double-checked locking pattern)
8989+ cachedToken, expiresAt := GetServiceToken(did, holdDID)
9090+ if cachedToken != "" && time.Until(expiresAt) > 10*time.Second {
9191+ slog.Debug("Service token cache hit after lock acquisition",
8792 "did", did,
8888- "holdDID", holdDID,
8989- "pdsEndpoint", pdsEndpoint,
9090- "error", err,
9191- "httpStatus", apiErr.StatusCode,
9292- "errorName", apiErr.Name,
9393- "errorMessage", apiErr.Message,
9494- "hint", getErrorHint(apiErr))
9595- } else {
9696- slog.Error("Failed to get OAuth session for service token",
9393+ "expiresIn", time.Until(expiresAt).Round(time.Second))
9494+ serviceToken = cachedToken
9595+ return nil
9696+ }
9797+9898+ // Cache still empty/expired - proceed with PDS call
9999+ // Request 5-minute expiry (PDS may grant less)
100100+ // exp must be absolute Unix timestamp, not relative duration
101101+ // Note: OAuth scope includes #atcr_hold fragment, but service auth aud must be bare DID
102102+ expiryTime := time.Now().Unix() + 300 // 5 minutes from now
103103+ serviceAuthURL := fmt.Sprintf("%s%s?aud=%s&lxm=%s&exp=%d",
104104+ pdsEndpoint,
105105+ atproto.ServerGetServiceAuth,
106106+ url.QueryEscape(holdDID),
107107+ url.QueryEscape("com.atproto.repo.getRecord"),
108108+ expiryTime,
109109+ )
110110+111111+ req, err := http.NewRequestWithContext(ctx, "GET", serviceAuthURL, nil)
112112+ if err != nil {
113113+ fetchErr = fmt.Errorf("failed to create service auth request: %w", err)
114114+ return fetchErr
115115+ }
116116+117117+ // Use OAuth session to authenticate to PDS (with DPoP)
118118+ // The lock is held, so DPoP nonce negotiation is serialized per-DID
119119+ resp, err := session.DoWithAuth(session.Client, req, "com.atproto.server.getServiceAuth")
120120+ if err != nil {
121121+ // Auth error - may indicate expired tokens or corrupted session
122122+ InvalidateServiceToken(did, holdDID)
123123+124124+ // Inspect the error to extract detailed information from indigo's APIError
125125+ var apiErr *atclient.APIError
126126+ if errors.As(err, &apiErr) {
127127+ // Log detailed API error information
128128+ slog.Error("OAuth authentication failed during service token request",
129129+ "component", "token/servicetoken",
130130+ "did", did,
131131+ "holdDID", holdDID,
132132+ "pdsEndpoint", pdsEndpoint,
133133+ "url", serviceAuthURL,
134134+ "error", err,
135135+ "httpStatus", apiErr.StatusCode,
136136+ "errorName", apiErr.Name,
137137+ "errorMessage", apiErr.Message,
138138+ "hint", getErrorHint(apiErr))
139139+ } else {
140140+ // Fallback for non-API errors (network errors, etc.)
141141+ slog.Error("OAuth authentication failed during service token request",
142142+ "component", "token/servicetoken",
143143+ "did", did,
144144+ "holdDID", holdDID,
145145+ "pdsEndpoint", pdsEndpoint,
146146+ "url", serviceAuthURL,
147147+ "error", err,
148148+ "errorType", fmt.Sprintf("%T", err),
149149+ "hint", "Network error or unexpected failure during OAuth request")
150150+ }
151151+152152+ fetchErr = fmt.Errorf("OAuth validation failed: %w", err)
153153+ return fetchErr
154154+ }
155155+ defer resp.Body.Close()
156156+157157+ if resp.StatusCode != http.StatusOK {
158158+ // Service auth failed
159159+ bodyBytes, _ := io.ReadAll(resp.Body)
160160+ InvalidateServiceToken(did, holdDID)
161161+ slog.Error("Service token request returned non-200 status",
97162 "component", "token/servicetoken",
98163 "did", did,
99164 "holdDID", holdDID,
100165 "pdsEndpoint", pdsEndpoint,
101101- "error", err,
102102- "errorType", fmt.Sprintf("%T", err),
103103- "hint", "OAuth session not found in database or token refresh failed")
166166+ "statusCode", resp.StatusCode,
167167+ "responseBody", string(bodyBytes),
168168+ "hint", "PDS rejected the service token request - check PDS logs for details")
169169+ fetchErr = fmt.Errorf("service auth failed with status %d: %s", resp.StatusCode, string(bodyBytes))
170170+ return fetchErr
104171 }
105172106106- // Delete the stale OAuth session to force re-authentication
107107- // This also invalidates the UI session automatically
108108- if delErr := refresher.DeleteSession(ctx, did); delErr != nil {
109109- slog.Warn("Failed to delete stale OAuth session",
110110- "component", "token/servicetoken",
111111- "did", did,
112112- "error", delErr)
173173+ // Parse response to get service token
174174+ var result struct {
175175+ Token string `json:"token"`
176176+ }
177177+ if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
178178+ fetchErr = fmt.Errorf("failed to decode service auth response: %w", err)
179179+ return fetchErr
113180 }
114181115115- return "", fmt.Errorf("failed to get OAuth session: %w", err)
116116- }
182182+ if result.Token == "" {
183183+ fetchErr = fmt.Errorf("empty token in service auth response")
184184+ return fetchErr
185185+ }
117186118118- // Call com.atproto.server.getServiceAuth on the user's PDS
119119- // Request 5-minute expiry (PDS may grant less)
120120- // exp must be absolute Unix timestamp, not relative duration
121121- // Note: OAuth scope includes #atcr_hold fragment, but service auth aud must be bare DID
122122- expiryTime := time.Now().Unix() + 300 // 5 minutes from now
123123- serviceAuthURL := fmt.Sprintf("%s%s?aud=%s&lxm=%s&exp=%d",
124124- pdsEndpoint,
125125- atproto.ServerGetServiceAuth,
126126- url.QueryEscape(holdDID),
127127- url.QueryEscape("com.atproto.repo.getRecord"),
128128- expiryTime,
129129- )
187187+ serviceToken = result.Token
188188+ return nil
189189+ })
130190131131- req, err := http.NewRequestWithContext(ctx, "GET", serviceAuthURL, nil)
132191 if err != nil {
133133- return "", fmt.Errorf("failed to create service auth request: %w", err)
134134- }
135135-136136- // Use OAuth session to authenticate to PDS (with DPoP)
137137- resp, err := session.DoWithAuth(session.Client, req, "com.atproto.server.getServiceAuth")
138138- if err != nil {
139139- // Auth error - may indicate expired tokens or corrupted session
192192+ // DoWithSession failed (session load or callback error)
140193 InvalidateServiceToken(did, holdDID)
141194142142- // Inspect the error to extract detailed information from indigo's APIError
195195+ // Try to extract detailed error information
143196 var apiErr *atclient.APIError
144197 if errors.As(err, &apiErr) {
145145- // Log detailed API error information
146146- slog.Error("OAuth authentication failed during service token request",
198198+ slog.Error("Failed to get OAuth session for service token",
147199 "component", "token/servicetoken",
148200 "did", did,
149201 "holdDID", holdDID,
150202 "pdsEndpoint", pdsEndpoint,
151151- "url", serviceAuthURL,
152203 "error", err,
153204 "httpStatus", apiErr.StatusCode,
154205 "errorName", apiErr.Name,
155206 "errorMessage", apiErr.Message,
156207 "hint", getErrorHint(apiErr))
157157- } else {
158158- // Fallback for non-API errors (network errors, etc.)
159159- slog.Error("OAuth authentication failed during service token request",
208208+ } else if fetchErr == nil {
209209+ // Session load failed (not a fetch error)
210210+ slog.Error("Failed to get OAuth session for service token",
160211 "component", "token/servicetoken",
161212 "did", did,
162213 "holdDID", holdDID,
163214 "pdsEndpoint", pdsEndpoint,
164164- "url", serviceAuthURL,
165215 "error", err,
166216 "errorType", fmt.Sprintf("%T", err),
167167- "hint", "Network error or unexpected failure during OAuth request")
217217+ "hint", "OAuth session not found in database or token refresh failed")
168218 }
169219170220 // Delete the stale OAuth session to force re-authentication
···176226 "error", delErr)
177227 }
178228179179- return "", fmt.Errorf("OAuth validation failed: %w", err)
180180- }
181181- defer resp.Body.Close()
182182-183183- if resp.StatusCode != http.StatusOK {
184184- // Service auth failed
185185- bodyBytes, _ := io.ReadAll(resp.Body)
186186- InvalidateServiceToken(did, holdDID)
187187- slog.Error("Service token request returned non-200 status",
188188- "component", "token/servicetoken",
189189- "did", did,
190190- "holdDID", holdDID,
191191- "pdsEndpoint", pdsEndpoint,
192192- "statusCode", resp.StatusCode,
193193- "responseBody", string(bodyBytes),
194194- "hint", "PDS rejected the service token request - check PDS logs for details")
195195- return "", fmt.Errorf("service auth failed with status %d: %s", resp.StatusCode, string(bodyBytes))
229229+ if fetchErr != nil {
230230+ return "", fetchErr
231231+ }
232232+ return "", fmt.Errorf("failed to get OAuth session: %w", err)
196233 }
197197-198198- // Parse response to get service token
199199- var result struct {
200200- Token string `json:"token"`
201201- }
202202- if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
203203- return "", fmt.Errorf("failed to decode service auth response: %w", err)
204204- }
205205-206206- if result.Token == "" {
207207- return "", fmt.Errorf("empty token in service auth response")
208208- }
209209-210210- serviceToken := result.Token
211234212235 // Cache the token (parses JWT to extract actual expiry)
213236 if err := SetServiceToken(did, holdDID, serviceToken); err != nil {