A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go
81
fork

Configure Feed

Select the types of activity you want to include in your feed.

missed new files for jetstream improvements

+1671 -3
+580
pkg/appview/db/batch.go
··· 1 + package db 2 + 3 + import ( 4 + "encoding/json" 5 + "fmt" 6 + "strings" 7 + "time" 8 + ) 9 + 10 + // BatchSize is the maximum number of rows included in a single multi-row INSERT. 11 + // Kept well under SQLite's default SQLITE_MAX_VARIABLE_NUMBER (32766) and any 12 + // remote libsql parameter ceiling — at 11 columns this is 1100 placeholders. 13 + const BatchSize = 100 14 + 15 + // buildPlaceholders returns a comma-separated list of `rows` groups of the form 16 + // `(?,?,?)`, each group containing `cols` placeholders. Used to construct the 17 + // VALUES clause of multi-row INSERT statements. 18 + func buildPlaceholders(rows, cols int) string { 19 + if rows <= 0 || cols <= 0 { 20 + return "" 21 + } 22 + group := "(" + strings.Repeat("?,", cols-1) + "?)" 23 + var sb strings.Builder 24 + sb.Grow((len(group) + 1) * rows) 25 + for i := 0; i < rows; i++ { 26 + if i > 0 { 27 + sb.WriteByte(',') 28 + } 29 + sb.WriteString(group) 30 + } 31 + return sb.String() 32 + } 33 + 34 + // chunk returns the half-open range [start, end) for the i-th chunk of size 35 + // BatchSize within a slice of length n. 36 + func chunk(n, i int) (start, end int) { 37 + start = i * BatchSize 38 + end = start + BatchSize 39 + if end > n { 40 + end = n 41 + } 42 + return start, end 43 + } 44 + 45 + // BatchInsertManifests upserts a batch of manifests and returns a map of 46 + // digest → manifest id for the inserted rows (both new and existing). Rows 47 + // are keyed by (did, repository, digest); callers that need the id must 48 + // group their input so that digest is unique per (did, repository) in one 49 + // batch call. 50 + // 51 + // Implementation: one multi-row INSERT per sub-batch, followed by one SELECT 52 + // to fetch ids back (libsql's RETURNING support across replica modes is 53 + // uneven; a second SELECT is reliable and still a single round-trip per 54 + // sub-batch). 55 + func BatchInsertManifests(db DBTX, manifests []Manifest) (map[string]int64, error) { 56 + out := make(map[string]int64, len(manifests)) 57 + if len(manifests) == 0 { 58 + return out, nil 59 + } 60 + 61 + for i := 0; i*BatchSize < len(manifests); i++ { 62 + start, end := chunk(len(manifests), i) 63 + batch := manifests[start:end] 64 + 65 + const cols = 11 66 + args := make([]any, 0, len(batch)*cols) 67 + for _, m := range batch { 68 + args = append(args, 69 + m.DID, m.Repository, m.Digest, m.HoldEndpoint, 70 + m.SchemaVersion, m.MediaType, m.ConfigDigest, 71 + m.ConfigSize, m.ArtifactType, 72 + nullString(m.SubjectDigest), 73 + m.CreatedAt, 74 + ) 75 + } 76 + 77 + query := ` 78 + INSERT INTO manifests 79 + (did, repository, digest, hold_endpoint, schema_version, media_type, 80 + config_digest, config_size, artifact_type, subject_digest, created_at) 81 + VALUES ` + buildPlaceholders(len(batch), cols) + ` 82 + ON CONFLICT(did, repository, digest) DO UPDATE SET 83 + hold_endpoint = excluded.hold_endpoint, 84 + schema_version = excluded.schema_version, 85 + media_type = excluded.media_type, 86 + config_digest = excluded.config_digest, 87 + config_size = excluded.config_size, 88 + artifact_type = excluded.artifact_type, 89 + subject_digest = excluded.subject_digest 90 + WHERE excluded.hold_endpoint != manifests.hold_endpoint 91 + OR excluded.schema_version != manifests.schema_version 92 + OR excluded.media_type != manifests.media_type 93 + OR excluded.config_digest IS NOT manifests.config_digest 94 + OR excluded.config_size IS NOT manifests.config_size 95 + OR excluded.artifact_type != manifests.artifact_type 96 + OR excluded.subject_digest IS NOT manifests.subject_digest 97 + ` 98 + if _, err := db.Exec(query, args...); err != nil { 99 + return nil, fmt.Errorf("batch insert manifests: %w", err) 100 + } 101 + 102 + // Fetch ids for this sub-batch by (did, digest) — digests are unique enough 103 + // that matching on (did, digest) avoids needing a three-column IN list. 104 + // repository is included in the row to disambiguate if a user genuinely has 105 + // the same digest across repos. 106 + selectArgs := make([]any, 0, 1+2*len(batch)) 107 + // Group by did (caller usually supplies one did per call, but be safe). 108 + didSet := make(map[string]struct{}) 109 + for _, m := range batch { 110 + didSet[m.DID] = struct{}{} 111 + } 112 + // Build a per-did IN (?) query; usually exactly one iteration. 113 + for did := range didSet { 114 + digests := make([]string, 0, len(batch)) 115 + for _, m := range batch { 116 + if m.DID == did { 117 + digests = append(digests, m.Digest) 118 + } 119 + } 120 + selectArgs = append(selectArgs[:0], did) 121 + for _, d := range digests { 122 + selectArgs = append(selectArgs, d) 123 + } 124 + selectQuery := ` 125 + SELECT repository, digest, id FROM manifests 126 + WHERE did = ? AND digest IN (` + 127 + strings.TrimSuffix(strings.Repeat("?,", len(digests)), ",") + `) 128 + ` 129 + rows, err := db.Query(selectQuery, selectArgs...) 130 + if err != nil { 131 + return nil, fmt.Errorf("batch select manifest ids: %w", err) 132 + } 133 + for rows.Next() { 134 + var repo, digest string 135 + var id int64 136 + if err := rows.Scan(&repo, &digest, &id); err != nil { 137 + rows.Close() 138 + return nil, fmt.Errorf("scan manifest id: %w", err) 139 + } 140 + // Key format matches what callers use: "did|repo|digest". 141 + out[manifestKey(did, repo, digest)] = id 142 + } 143 + rows.Close() 144 + } 145 + } 146 + return out, nil 147 + } 148 + 149 + // ManifestKey builds the lookup key used by BatchInsertManifests' result map. 150 + // Callers construct the same key from their in-memory Manifest structs to 151 + // find the assigned id. 152 + func ManifestKey(did, repository, digest string) string { 153 + return manifestKey(did, repository, digest) 154 + } 155 + 156 + func manifestKey(did, repository, digest string) string { 157 + return did + "|" + repository + "|" + digest 158 + } 159 + 160 + // BatchInsertLayers inserts a batch of layers, skipping any that already exist. 161 + // Layers are immutable, so ON CONFLICT DO NOTHING matches the single-row 162 + // InsertLayer semantics. 163 + func BatchInsertLayers(db DBTX, layers []Layer) error { 164 + if len(layers) == 0 { 165 + return nil 166 + } 167 + for i := 0; i*BatchSize < len(layers); i++ { 168 + start, end := chunk(len(layers), i) 169 + batch := layers[start:end] 170 + 171 + const cols = 6 172 + args := make([]any, 0, len(batch)*cols) 173 + for _, l := range batch { 174 + var annotationsJSON any 175 + if len(l.Annotations) > 0 { 176 + b, err := json.Marshal(l.Annotations) 177 + if err != nil { 178 + return fmt.Errorf("marshal layer annotations: %w", err) 179 + } 180 + s := string(b) 181 + annotationsJSON = &s 182 + } 183 + args = append(args, l.ManifestID, l.Digest, l.Size, l.MediaType, l.LayerIndex, annotationsJSON) 184 + } 185 + 186 + query := ` 187 + INSERT INTO layers (manifest_id, digest, size, media_type, layer_index, annotations) 188 + VALUES ` + buildPlaceholders(len(batch), cols) + ` 189 + ON CONFLICT(manifest_id, layer_index) DO NOTHING 190 + ` 191 + if _, err := db.Exec(query, args...); err != nil { 192 + return fmt.Errorf("batch insert layers: %w", err) 193 + } 194 + } 195 + return nil 196 + } 197 + 198 + // BatchInsertManifestReferences inserts a batch of manifest references. 199 + // The table has PRIMARY KEY(manifest_id, reference_index); duplicates skip. 200 + func BatchInsertManifestReferences(db DBTX, refs []ManifestReference) error { 201 + if len(refs) == 0 { 202 + return nil 203 + } 204 + for i := 0; i*BatchSize < len(refs); i++ { 205 + start, end := chunk(len(refs), i) 206 + batch := refs[start:end] 207 + 208 + const cols = 10 209 + args := make([]any, 0, len(batch)*cols) 210 + for _, r := range batch { 211 + args = append(args, 212 + r.ManifestID, r.Digest, r.Size, r.MediaType, 213 + r.PlatformArchitecture, r.PlatformOS, 214 + r.PlatformVariant, r.PlatformOSVersion, 215 + r.IsAttestation, r.ReferenceIndex, 216 + ) 217 + } 218 + 219 + query := ` 220 + INSERT INTO manifest_references (manifest_id, digest, size, media_type, 221 + platform_architecture, platform_os, 222 + platform_variant, platform_os_version, 223 + is_attestation, reference_index) 224 + VALUES ` + buildPlaceholders(len(batch), cols) + ` 225 + ON CONFLICT(manifest_id, reference_index) DO NOTHING 226 + ` 227 + if _, err := db.Exec(query, args...); err != nil { 228 + return fmt.Errorf("batch insert manifest references: %w", err) 229 + } 230 + } 231 + return nil 232 + } 233 + 234 + // BatchUpsertTags upserts a batch of tag records, matching UpsertTag semantics. 235 + func BatchUpsertTags(db DBTX, tags []Tag) error { 236 + if len(tags) == 0 { 237 + return nil 238 + } 239 + for i := 0; i*BatchSize < len(tags); i++ { 240 + start, end := chunk(len(tags), i) 241 + batch := tags[start:end] 242 + 243 + const cols = 5 244 + args := make([]any, 0, len(batch)*cols) 245 + for _, t := range batch { 246 + args = append(args, t.DID, t.Repository, t.Tag, t.Digest, t.CreatedAt) 247 + } 248 + 249 + query := ` 250 + INSERT INTO tags (did, repository, tag, digest, created_at) 251 + VALUES ` + buildPlaceholders(len(batch), cols) + ` 252 + ON CONFLICT(did, repository, tag) DO UPDATE SET 253 + digest = excluded.digest, 254 + created_at = excluded.created_at 255 + WHERE excluded.digest != tags.digest 256 + OR excluded.created_at != tags.created_at 257 + ` 258 + if _, err := db.Exec(query, args...); err != nil { 259 + return fmt.Errorf("batch upsert tags: %w", err) 260 + } 261 + } 262 + return nil 263 + } 264 + 265 + // StarInput is a struct projection of the UpsertStar argument list for use with BatchUpsertStars. 266 + type StarInput struct { 267 + StarrerDID string 268 + OwnerDID string 269 + Repository string 270 + CreatedAt time.Time 271 + } 272 + 273 + // BatchUpsertStars upserts a batch of stars. Stars are immutable. 274 + func BatchUpsertStars(db DBTX, stars []StarInput) error { 275 + if len(stars) == 0 { 276 + return nil 277 + } 278 + for i := 0; i*BatchSize < len(stars); i++ { 279 + start, end := chunk(len(stars), i) 280 + batch := stars[start:end] 281 + 282 + const cols = 4 283 + args := make([]any, 0, len(batch)*cols) 284 + for _, s := range batch { 285 + args = append(args, s.StarrerDID, s.OwnerDID, s.Repository, s.CreatedAt) 286 + } 287 + 288 + query := ` 289 + INSERT INTO stars (starrer_did, owner_did, repository, created_at) 290 + VALUES ` + buildPlaceholders(len(batch), cols) + ` 291 + ON CONFLICT(starrer_did, owner_did, repository) DO NOTHING 292 + ` 293 + if _, err := db.Exec(query, args...); err != nil { 294 + return fmt.Errorf("batch upsert stars: %w", err) 295 + } 296 + } 297 + return nil 298 + } 299 + 300 + // BatchUpsertRepoPages upserts a batch of repo page records. 301 + func BatchUpsertRepoPages(db DBTX, pages []RepoPage) error { 302 + if len(pages) == 0 { 303 + return nil 304 + } 305 + for i := 0; i*BatchSize < len(pages); i++ { 306 + start, end := chunk(len(pages), i) 307 + batch := pages[start:end] 308 + 309 + const cols = 7 310 + args := make([]any, 0, len(batch)*cols) 311 + for _, p := range batch { 312 + args = append(args, 313 + p.DID, p.Repository, p.Description, p.AvatarCID, 314 + p.UserEdited, p.CreatedAt, p.UpdatedAt, 315 + ) 316 + } 317 + 318 + query := ` 319 + INSERT INTO repo_pages (did, repository, description, avatar_cid, user_edited, created_at, updated_at) 320 + VALUES ` + buildPlaceholders(len(batch), cols) + ` 321 + ON CONFLICT(did, repository) DO UPDATE SET 322 + description = excluded.description, 323 + avatar_cid = excluded.avatar_cid, 324 + user_edited = excluded.user_edited, 325 + updated_at = excluded.updated_at 326 + WHERE excluded.description IS NOT repo_pages.description 327 + OR excluded.avatar_cid IS NOT repo_pages.avatar_cid 328 + OR excluded.user_edited IS NOT repo_pages.user_edited 329 + ` 330 + if _, err := db.Exec(query, args...); err != nil { 331 + return fmt.Errorf("batch upsert repo pages: %w", err) 332 + } 333 + } 334 + return nil 335 + } 336 + 337 + // BatchUpsertDailyStats upserts a batch of daily stats rows. 338 + func BatchUpsertDailyStats(db DBTX, stats []DailyStats) error { 339 + if len(stats) == 0 { 340 + return nil 341 + } 342 + for i := 0; i*BatchSize < len(stats); i++ { 343 + start, end := chunk(len(stats), i) 344 + batch := stats[start:end] 345 + 346 + const cols = 5 347 + args := make([]any, 0, len(batch)*cols) 348 + for _, s := range batch { 349 + args = append(args, s.DID, s.Repository, s.Date, s.PullCount, s.PushCount) 350 + } 351 + 352 + query := ` 353 + INSERT INTO repository_stats_daily (did, repository, date, pull_count, push_count) 354 + VALUES ` + buildPlaceholders(len(batch), cols) + ` 355 + ON CONFLICT(did, repository, date) DO UPDATE SET 356 + pull_count = excluded.pull_count, 357 + push_count = excluded.push_count 358 + WHERE excluded.pull_count != repository_stats_daily.pull_count 359 + OR excluded.push_count != repository_stats_daily.push_count 360 + ` 361 + if _, err := db.Exec(query, args...); err != nil { 362 + return fmt.Errorf("batch upsert daily stats: %w", err) 363 + } 364 + } 365 + return nil 366 + } 367 + 368 + // BatchUpsertRepositoryStats upserts aggregated repository stats. 369 + func BatchUpsertRepositoryStats(db DBTX, stats []RepositoryStats) error { 370 + if len(stats) == 0 { 371 + return nil 372 + } 373 + for i := 0; i*BatchSize < len(stats); i++ { 374 + start, end := chunk(len(stats), i) 375 + batch := stats[start:end] 376 + 377 + const cols = 6 378 + args := make([]any, 0, len(batch)*cols) 379 + for _, s := range batch { 380 + args = append(args, 381 + s.DID, s.Repository, s.PullCount, s.LastPull, s.PushCount, s.LastPush, 382 + ) 383 + } 384 + 385 + query := ` 386 + INSERT INTO repository_stats (did, repository, pull_count, last_pull, push_count, last_push) 387 + VALUES ` + buildPlaceholders(len(batch), cols) + ` 388 + ON CONFLICT(did, repository) DO UPDATE SET 389 + pull_count = excluded.pull_count, 390 + last_pull = excluded.last_pull, 391 + push_count = excluded.push_count, 392 + last_push = excluded.last_push 393 + WHERE excluded.pull_count != repository_stats.pull_count 394 + OR excluded.last_pull IS NOT repository_stats.last_pull 395 + OR excluded.push_count != repository_stats.push_count 396 + OR excluded.last_push IS NOT repository_stats.last_push 397 + ` 398 + if _, err := db.Exec(query, args...); err != nil { 399 + return fmt.Errorf("batch upsert repository stats: %w", err) 400 + } 401 + } 402 + return nil 403 + } 404 + 405 + // BatchUpsertCaptainRecords upserts a batch of captain records. 406 + func BatchUpsertCaptainRecords(db DBTX, records []HoldCaptainRecord) error { 407 + if len(records) == 0 { 408 + return nil 409 + } 410 + for i := 0; i*BatchSize < len(records); i++ { 411 + start, end := chunk(len(records), i) 412 + batch := records[start:end] 413 + 414 + const cols = 8 415 + args := make([]any, 0, len(batch)*cols) 416 + for _, r := range batch { 417 + args = append(args, 418 + r.HoldDID, r.OwnerDID, r.Public, r.AllowAllCrew, 419 + nullString(r.DeployedAt), 420 + nullString(r.Region), 421 + nullString(r.Successor), 422 + r.UpdatedAt, 423 + ) 424 + } 425 + 426 + query := ` 427 + INSERT INTO hold_captain_records ( 428 + hold_did, owner_did, public, allow_all_crew, 429 + deployed_at, region, successor, updated_at 430 + ) VALUES ` + buildPlaceholders(len(batch), cols) + ` 431 + ON CONFLICT(hold_did) DO UPDATE SET 432 + owner_did = excluded.owner_did, 433 + public = excluded.public, 434 + allow_all_crew = excluded.allow_all_crew, 435 + deployed_at = excluded.deployed_at, 436 + region = excluded.region, 437 + successor = excluded.successor, 438 + updated_at = excluded.updated_at 439 + WHERE excluded.owner_did != hold_captain_records.owner_did 440 + OR excluded.public != hold_captain_records.public 441 + OR excluded.allow_all_crew != hold_captain_records.allow_all_crew 442 + OR excluded.deployed_at IS NOT hold_captain_records.deployed_at 443 + OR excluded.region IS NOT hold_captain_records.region 444 + OR excluded.successor IS NOT hold_captain_records.successor 445 + ` 446 + if _, err := db.Exec(query, args...); err != nil { 447 + return fmt.Errorf("batch upsert captain records: %w", err) 448 + } 449 + } 450 + return nil 451 + } 452 + 453 + // BatchUpsertCrewMembers upserts a batch of crew members. 454 + func BatchUpsertCrewMembers(db DBTX, members []CrewMember) error { 455 + if len(members) == 0 { 456 + return nil 457 + } 458 + for i := 0; i*BatchSize < len(members); i++ { 459 + start, end := chunk(len(members), i) 460 + batch := members[start:end] 461 + 462 + // updated_at uses CURRENT_TIMESTAMP literal, so it's not a placeholder. 463 + const cols = 7 464 + args := make([]any, 0, len(batch)*cols) 465 + for _, m := range batch { 466 + args = append(args, 467 + m.HoldDID, m.MemberDID, m.Rkey, 468 + nullString(m.Role), 469 + nullString(m.Permissions), 470 + nullString(m.Tier), 471 + nullString(m.AddedAt), 472 + ) 473 + } 474 + 475 + // Replace each group with `(?,?,?,?,?,?,?,CURRENT_TIMESTAMP)` — we build it 476 + // manually because buildPlaceholders only handles uniform placeholders. 477 + group := "(" + strings.Repeat("?,", cols) + "CURRENT_TIMESTAMP)" 478 + var sb strings.Builder 479 + sb.Grow((len(group) + 1) * len(batch)) 480 + for i := 0; i < len(batch); i++ { 481 + if i > 0 { 482 + sb.WriteByte(',') 483 + } 484 + sb.WriteString(group) 485 + } 486 + 487 + query := ` 488 + INSERT INTO hold_crew_members ( 489 + hold_did, member_did, rkey, role, permissions, tier, added_at, updated_at 490 + ) VALUES ` + sb.String() + ` 491 + ON CONFLICT(hold_did, member_did) DO UPDATE SET 492 + rkey = excluded.rkey, 493 + role = excluded.role, 494 + permissions = excluded.permissions, 495 + tier = excluded.tier, 496 + added_at = excluded.added_at, 497 + updated_at = CURRENT_TIMESTAMP 498 + WHERE excluded.rkey != hold_crew_members.rkey 499 + OR excluded.role IS NOT hold_crew_members.role 500 + OR excluded.permissions IS NOT hold_crew_members.permissions 501 + OR excluded.tier IS NOT hold_crew_members.tier 502 + OR excluded.added_at IS NOT hold_crew_members.added_at 503 + ` 504 + if _, err := db.Exec(query, args...); err != nil { 505 + return fmt.Errorf("batch upsert crew members: %w", err) 506 + } 507 + } 508 + return nil 509 + } 510 + 511 + // AnnotationRow represents a single key/value annotation for a repository, 512 + // used by BatchUpsertRepositoryAnnotations. 513 + type AnnotationRow struct { 514 + DID string 515 + Repository string 516 + Key string 517 + Value string 518 + } 519 + 520 + // BatchUpsertRepositoryAnnotations upserts annotation rows and deletes any 521 + // stale keys for each (did, repository) represented in the input. The caller 522 + // is responsible for pre-filtering: rows should represent only repositories 523 + // whose newest manifest has at least one non-empty annotation, matching the 524 + // single-row UpsertRepositoryAnnotations semantics. 525 + func BatchUpsertRepositoryAnnotations(db DBTX, rows []AnnotationRow) error { 526 + if len(rows) == 0 { 527 + return nil 528 + } 529 + 530 + // Group rows by (did, repository) so we can delete stale keys per repo. 531 + type repoKey struct{ did, repo string } 532 + keysByRepo := make(map[repoKey][]string) 533 + for _, r := range rows { 534 + k := repoKey{r.DID, r.Repository} 535 + keysByRepo[k] = append(keysByRepo[k], r.Key) 536 + } 537 + 538 + // Delete stale keys per repository in one statement each. We could batch 539 + // further with OR chains, but DELETE is cheap and each repo has few keys. 540 + for k, keys := range keysByRepo { 541 + placeholders := strings.TrimSuffix(strings.Repeat("?,", len(keys)), ",") 542 + args := make([]any, 0, 2+len(keys)) 543 + args = append(args, k.did, k.repo) 544 + for _, key := range keys { 545 + args = append(args, key) 546 + } 547 + if _, err := db.Exec(` 548 + DELETE FROM repository_annotations 549 + WHERE did = ? AND repository = ? AND key NOT IN (`+placeholders+`) 550 + `, args...); err != nil { 551 + return fmt.Errorf("batch delete stale annotations: %w", err) 552 + } 553 + } 554 + 555 + // Upsert all annotation rows in sub-batches. 556 + now := time.Now() 557 + for i := 0; i*BatchSize < len(rows); i++ { 558 + start, end := chunk(len(rows), i) 559 + batch := rows[start:end] 560 + 561 + const cols = 5 562 + args := make([]any, 0, len(batch)*cols) 563 + for _, r := range batch { 564 + args = append(args, r.DID, r.Repository, r.Key, r.Value, now) 565 + } 566 + 567 + query := ` 568 + INSERT INTO repository_annotations (did, repository, key, value, updated_at) 569 + VALUES ` + buildPlaceholders(len(batch), cols) + ` 570 + ON CONFLICT(did, repository, key) DO UPDATE SET 571 + value = excluded.value, 572 + updated_at = excluded.updated_at 573 + WHERE excluded.value != repository_annotations.value 574 + ` 575 + if _, err := db.Exec(query, args...); err != nil { 576 + return fmt.Errorf("batch upsert annotations: %w", err) 577 + } 578 + } 579 + return nil 580 + }
+383
pkg/appview/db/batch_test.go
··· 1 + package db 2 + 3 + import ( 4 + "database/sql" 5 + "fmt" 6 + "strings" 7 + "testing" 8 + "time" 9 + ) 10 + 11 + // setupBatchTestDB spins up a fresh in-memory libsql database with the full 12 + // schema applied, so every batch test can write realistic data without 13 + // stubbing individual tables. 14 + func setupBatchTestDB(t *testing.T) *sql.DB { 15 + t.Helper() 16 + safeName := strings.ReplaceAll(t.Name(), "/", "_") 17 + d, err := InitDB(fmt.Sprintf("file:%s?mode=memory&cache=shared", safeName), LibsqlConfig{}) 18 + if err != nil { 19 + t.Fatalf("init db: %v", err) 20 + } 21 + // Single conn to avoid cross-test contention in the shared in-memory cache. 22 + d.SetMaxOpenConns(1) 23 + t.Cleanup(func() { d.Close() }) 24 + return d 25 + } 26 + 27 + func createBatchTestUser(t *testing.T, d *sql.DB, did string) { 28 + t.Helper() 29 + _, err := d.Exec(` 30 + INSERT OR IGNORE INTO users (did, handle, pds_endpoint, last_seen) 31 + VALUES (?, ?, ?, datetime('now')) 32 + `, did, did+".bsky.social", "https://pds.example.com") 33 + if err != nil { 34 + t.Fatalf("seed user: %v", err) 35 + } 36 + } 37 + 38 + func countRows(t *testing.T, d *sql.DB, query string, args ...any) int { 39 + t.Helper() 40 + var n int 41 + if err := d.QueryRow(query, args...).Scan(&n); err != nil { 42 + t.Fatalf("count: %v", err) 43 + } 44 + return n 45 + } 46 + 47 + func TestBuildPlaceholders(t *testing.T) { 48 + cases := []struct { 49 + rows, cols int 50 + want string 51 + }{ 52 + {1, 1, "(?)"}, 53 + {2, 1, "(?),(?)"}, 54 + {1, 3, "(?,?,?)"}, 55 + {3, 2, "(?,?),(?,?),(?,?)"}, 56 + {0, 5, ""}, 57 + {5, 0, ""}, 58 + } 59 + for _, c := range cases { 60 + got := buildPlaceholders(c.rows, c.cols) 61 + if got != c.want { 62 + t.Errorf("buildPlaceholders(%d,%d) = %q, want %q", c.rows, c.cols, got, c.want) 63 + } 64 + } 65 + } 66 + 67 + func TestBatchInsertManifests_InsertsAndReturnsIDs(t *testing.T) { 68 + d := setupBatchTestDB(t) 69 + createBatchTestUser(t, d, "did:plc:alice") 70 + 71 + now := time.Now() 72 + manifests := []Manifest{ 73 + {DID: "did:plc:alice", Repository: "app1", Digest: "sha256:aaa", HoldEndpoint: "did:web:hold", SchemaVersion: 2, MediaType: "application/vnd.oci.image.manifest.v1+json", ArtifactType: "container-image", CreatedAt: now}, 74 + {DID: "did:plc:alice", Repository: "app2", Digest: "sha256:bbb", HoldEndpoint: "did:web:hold", SchemaVersion: 2, MediaType: "application/vnd.oci.image.manifest.v1+json", ArtifactType: "container-image", CreatedAt: now}, 75 + } 76 + 77 + ids, err := BatchInsertManifests(d, manifests) 78 + if err != nil { 79 + t.Fatalf("batch insert: %v", err) 80 + } 81 + if len(ids) != 2 { 82 + t.Fatalf("expected 2 ids, got %d", len(ids)) 83 + } 84 + if ids[ManifestKey("did:plc:alice", "app1", "sha256:aaa")] == 0 { 85 + t.Errorf("missing id for app1") 86 + } 87 + if ids[ManifestKey("did:plc:alice", "app2", "sha256:bbb")] == 0 { 88 + t.Errorf("missing id for app2") 89 + } 90 + if got := countRows(t, d, `SELECT COUNT(*) FROM manifests`); got != 2 { 91 + t.Errorf("row count = %d, want 2", got) 92 + } 93 + } 94 + 95 + func TestBatchInsertManifests_Idempotent(t *testing.T) { 96 + d := setupBatchTestDB(t) 97 + createBatchTestUser(t, d, "did:plc:alice") 98 + 99 + now := time.Now() 100 + m := []Manifest{{ 101 + DID: "did:plc:alice", Repository: "app", Digest: "sha256:aaa", 102 + HoldEndpoint: "did:web:hold", SchemaVersion: 2, 103 + MediaType: "application/vnd.oci.image.manifest.v1+json", 104 + ArtifactType: "container-image", CreatedAt: now, 105 + }} 106 + if _, err := BatchInsertManifests(d, m); err != nil { 107 + t.Fatalf("first insert: %v", err) 108 + } 109 + if _, err := BatchInsertManifests(d, m); err != nil { 110 + t.Fatalf("second insert: %v", err) 111 + } 112 + if got := countRows(t, d, `SELECT COUNT(*) FROM manifests`); got != 1 { 113 + t.Errorf("expected idempotent; row count = %d", got) 114 + } 115 + } 116 + 117 + func TestBatchInsertManifests_Chunking(t *testing.T) { 118 + // Exceed one sub-batch to exercise the chunk loop. 119 + d := setupBatchTestDB(t) 120 + createBatchTestUser(t, d, "did:plc:alice") 121 + 122 + const n = BatchSize + 17 123 + now := time.Now() 124 + manifests := make([]Manifest, n) 125 + for i := 0; i < n; i++ { 126 + manifests[i] = Manifest{ 127 + DID: "did:plc:alice", Repository: "app", Digest: fmt.Sprintf("sha256:%04d", i), 128 + HoldEndpoint: "did:web:hold", SchemaVersion: 2, 129 + MediaType: "application/vnd.oci.image.manifest.v1+json", 130 + ArtifactType: "container-image", CreatedAt: now, 131 + } 132 + } 133 + ids, err := BatchInsertManifests(d, manifests) 134 + if err != nil { 135 + t.Fatalf("batch insert: %v", err) 136 + } 137 + if len(ids) != n { 138 + t.Errorf("ids len = %d, want %d", len(ids), n) 139 + } 140 + if got := countRows(t, d, `SELECT COUNT(*) FROM manifests`); got != n { 141 + t.Errorf("row count = %d, want %d", got, n) 142 + } 143 + } 144 + 145 + func TestBatchInsertLayers_RespectsFK(t *testing.T) { 146 + d := setupBatchTestDB(t) 147 + createBatchTestUser(t, d, "did:plc:alice") 148 + 149 + now := time.Now() 150 + ids, err := BatchInsertManifests(d, []Manifest{{ 151 + DID: "did:plc:alice", Repository: "app", Digest: "sha256:aaa", 152 + HoldEndpoint: "did:web:hold", SchemaVersion: 2, 153 + MediaType: "application/vnd.oci.image.manifest.v1+json", 154 + ArtifactType: "container-image", CreatedAt: now, 155 + }}) 156 + if err != nil { 157 + t.Fatalf("insert manifest: %v", err) 158 + } 159 + mid := ids[ManifestKey("did:plc:alice", "app", "sha256:aaa")] 160 + 161 + layers := []Layer{ 162 + {ManifestID: mid, Digest: "sha256:L0", Size: 100, MediaType: "application/vnd.oci.image.layer.v1.tar+gzip", LayerIndex: 0}, 163 + {ManifestID: mid, Digest: "sha256:L1", Size: 200, MediaType: "application/vnd.oci.image.layer.v1.tar+gzip", LayerIndex: 1}, 164 + } 165 + if err := BatchInsertLayers(d, layers); err != nil { 166 + t.Fatalf("batch insert layers: %v", err) 167 + } 168 + if got := countRows(t, d, `SELECT COUNT(*) FROM layers`); got != 2 { 169 + t.Errorf("layers count = %d, want 2", got) 170 + } 171 + // Re-run to confirm ON CONFLICT DO NOTHING doesn't error. 172 + if err := BatchInsertLayers(d, layers); err != nil { 173 + t.Fatalf("idempotent layers: %v", err) 174 + } 175 + if got := countRows(t, d, `SELECT COUNT(*) FROM layers`); got != 2 { 176 + t.Errorf("layers after re-insert = %d, want 2", got) 177 + } 178 + } 179 + 180 + func TestBatchUpsertTags_Idempotent(t *testing.T) { 181 + d := setupBatchTestDB(t) 182 + createBatchTestUser(t, d, "did:plc:alice") 183 + 184 + now := time.Now() 185 + tags := []Tag{ 186 + {DID: "did:plc:alice", Repository: "app", Tag: "v1", Digest: "sha256:aaa", CreatedAt: now}, 187 + {DID: "did:plc:alice", Repository: "app", Tag: "v2", Digest: "sha256:bbb", CreatedAt: now}, 188 + } 189 + if err := BatchUpsertTags(d, tags); err != nil { 190 + t.Fatalf("batch upsert: %v", err) 191 + } 192 + if err := BatchUpsertTags(d, tags); err != nil { 193 + t.Fatalf("rerun: %v", err) 194 + } 195 + if got := countRows(t, d, `SELECT COUNT(*) FROM tags`); got != 2 { 196 + t.Errorf("tags count = %d, want 2", got) 197 + } 198 + } 199 + 200 + func TestBatchUpsertStars(t *testing.T) { 201 + d := setupBatchTestDB(t) 202 + createBatchTestUser(t, d, "did:plc:alice") 203 + createBatchTestUser(t, d, "did:plc:bob") 204 + 205 + now := time.Now() 206 + stars := []StarInput{ 207 + {StarrerDID: "did:plc:bob", OwnerDID: "did:plc:alice", Repository: "app", CreatedAt: now}, 208 + } 209 + if err := BatchUpsertStars(d, stars); err != nil { 210 + t.Fatalf("batch upsert stars: %v", err) 211 + } 212 + // Re-insert to confirm ON CONFLICT DO NOTHING. 213 + if err := BatchUpsertStars(d, stars); err != nil { 214 + t.Fatalf("rerun: %v", err) 215 + } 216 + if got := countRows(t, d, `SELECT COUNT(*) FROM stars`); got != 1 { 217 + t.Errorf("stars count = %d, want 1", got) 218 + } 219 + } 220 + 221 + func TestBatchUpsertRepoPages(t *testing.T) { 222 + d := setupBatchTestDB(t) 223 + createBatchTestUser(t, d, "did:plc:alice") 224 + 225 + now := time.Now() 226 + pages := []RepoPage{ 227 + {DID: "did:plc:alice", Repository: "app", Description: "desc", CreatedAt: now, UpdatedAt: now}, 228 + } 229 + if err := BatchUpsertRepoPages(d, pages); err != nil { 230 + t.Fatalf("batch upsert: %v", err) 231 + } 232 + // Update with new description. 233 + pages[0].Description = "new desc" 234 + if err := BatchUpsertRepoPages(d, pages); err != nil { 235 + t.Fatalf("update: %v", err) 236 + } 237 + var desc string 238 + if err := d.QueryRow(`SELECT description FROM repo_pages WHERE did=? AND repository=?`, 239 + "did:plc:alice", "app").Scan(&desc); err != nil { 240 + t.Fatalf("select: %v", err) 241 + } 242 + if desc != "new desc" { 243 + t.Errorf("description = %q, want %q", desc, "new desc") 244 + } 245 + } 246 + 247 + func TestBatchUpsertDailyStats(t *testing.T) { 248 + d := setupBatchTestDB(t) 249 + createBatchTestUser(t, d, "did:plc:alice") 250 + 251 + stats := []DailyStats{ 252 + {DID: "did:plc:alice", Repository: "app", Date: "2026-04-19", PullCount: 5, PushCount: 2}, 253 + } 254 + if err := BatchUpsertDailyStats(d, stats); err != nil { 255 + t.Fatalf("upsert: %v", err) 256 + } 257 + stats[0].PullCount = 10 258 + if err := BatchUpsertDailyStats(d, stats); err != nil { 259 + t.Fatalf("update: %v", err) 260 + } 261 + var pull int 262 + if err := d.QueryRow(`SELECT pull_count FROM repository_stats_daily WHERE did=? AND repository=? AND date=?`, 263 + "did:plc:alice", "app", "2026-04-19").Scan(&pull); err != nil { 264 + t.Fatalf("select: %v", err) 265 + } 266 + if pull != 10 { 267 + t.Errorf("pull = %d, want 10", pull) 268 + } 269 + } 270 + 271 + func TestBatchUpsertRepositoryAnnotations_DropsStaleKeys(t *testing.T) { 272 + d := setupBatchTestDB(t) 273 + createBatchTestUser(t, d, "did:plc:alice") 274 + 275 + rows := []AnnotationRow{ 276 + {DID: "did:plc:alice", Repository: "app", Key: "a", Value: "1"}, 277 + {DID: "did:plc:alice", Repository: "app", Key: "b", Value: "2"}, 278 + } 279 + if err := BatchUpsertRepositoryAnnotations(d, rows); err != nil { 280 + t.Fatalf("initial: %v", err) 281 + } 282 + if got := countRows(t, d, `SELECT COUNT(*) FROM repository_annotations WHERE did=? AND repository=?`, 283 + "did:plc:alice", "app"); got != 2 { 284 + t.Errorf("initial count = %d, want 2", got) 285 + } 286 + 287 + // Second call drops stale key "b". 288 + rows = []AnnotationRow{ 289 + {DID: "did:plc:alice", Repository: "app", Key: "a", Value: "1-updated"}, 290 + } 291 + if err := BatchUpsertRepositoryAnnotations(d, rows); err != nil { 292 + t.Fatalf("update: %v", err) 293 + } 294 + if got := countRows(t, d, `SELECT COUNT(*) FROM repository_annotations WHERE did=? AND repository=?`, 295 + "did:plc:alice", "app"); got != 1 { 296 + t.Errorf("after update = %d, want 1", got) 297 + } 298 + var val string 299 + if err := d.QueryRow(`SELECT value FROM repository_annotations WHERE key=? AND did=? AND repository=?`, 300 + "a", "did:plc:alice", "app").Scan(&val); err != nil { 301 + t.Fatalf("select: %v", err) 302 + } 303 + if val != "1-updated" { 304 + t.Errorf("value = %q, want 1-updated", val) 305 + } 306 + } 307 + 308 + func TestBatchUpsertCaptainRecords(t *testing.T) { 309 + d := setupBatchTestDB(t) 310 + 311 + now := time.Now() 312 + records := []HoldCaptainRecord{ 313 + {HoldDID: "did:web:hold1", OwnerDID: "did:plc:alice", Public: true, AllowAllCrew: false, UpdatedAt: now}, 314 + } 315 + if err := BatchUpsertCaptainRecords(d, records); err != nil { 316 + t.Fatalf("upsert: %v", err) 317 + } 318 + if got := countRows(t, d, `SELECT COUNT(*) FROM hold_captain_records`); got != 1 { 319 + t.Errorf("count = %d, want 1", got) 320 + } 321 + } 322 + 323 + func TestBatchUpsertCrewMembers(t *testing.T) { 324 + d := setupBatchTestDB(t) 325 + 326 + members := []CrewMember{ 327 + {HoldDID: "did:web:hold1", MemberDID: "did:plc:alice", Rkey: "rkey1", Role: "owner"}, 328 + } 329 + if err := BatchUpsertCrewMembers(d, members); err != nil { 330 + t.Fatalf("upsert: %v", err) 331 + } 332 + // Update the rkey: triggers the ON CONFLICT path. 333 + members[0].Rkey = "rkey2" 334 + if err := BatchUpsertCrewMembers(d, members); err != nil { 335 + t.Fatalf("update: %v", err) 336 + } 337 + var rkey string 338 + if err := d.QueryRow(`SELECT rkey FROM hold_crew_members WHERE hold_did=? AND member_did=?`, 339 + "did:web:hold1", "did:plc:alice").Scan(&rkey); err != nil { 340 + t.Fatalf("select: %v", err) 341 + } 342 + if rkey != "rkey2" { 343 + t.Errorf("rkey = %q, want rkey2", rkey) 344 + } 345 + } 346 + 347 + func TestBatchEmptySlices(t *testing.T) { 348 + d := setupBatchTestDB(t) 349 + // Every batch function must tolerate an empty input slice without erroring. 350 + if _, err := BatchInsertManifests(d, nil); err != nil { 351 + t.Errorf("manifests: %v", err) 352 + } 353 + if err := BatchInsertLayers(d, nil); err != nil { 354 + t.Errorf("layers: %v", err) 355 + } 356 + if err := BatchInsertManifestReferences(d, nil); err != nil { 357 + t.Errorf("refs: %v", err) 358 + } 359 + if err := BatchUpsertTags(d, nil); err != nil { 360 + t.Errorf("tags: %v", err) 361 + } 362 + if err := BatchUpsertStars(d, nil); err != nil { 363 + t.Errorf("stars: %v", err) 364 + } 365 + if err := BatchUpsertRepoPages(d, nil); err != nil { 366 + t.Errorf("repo pages: %v", err) 367 + } 368 + if err := BatchUpsertDailyStats(d, nil); err != nil { 369 + t.Errorf("daily: %v", err) 370 + } 371 + if err := BatchUpsertRepositoryStats(d, nil); err != nil { 372 + t.Errorf("repo stats: %v", err) 373 + } 374 + if err := BatchUpsertCaptainRecords(d, nil); err != nil { 375 + t.Errorf("captain: %v", err) 376 + } 377 + if err := BatchUpsertCrewMembers(d, nil); err != nil { 378 + t.Errorf("crew: %v", err) 379 + } 380 + if err := BatchUpsertRepositoryAnnotations(d, nil); err != nil { 381 + t.Errorf("annotations: %v", err) 382 + } 383 + }
+69
pkg/appview/db/conn.go
··· 1 + package db 2 + 3 + import ( 4 + "context" 5 + "database/sql" 6 + "database/sql/driver" 7 + "errors" 8 + "strings" 9 + ) 10 + 11 + // poisonedTxSubstrings are error-message substrings emitted when go-libsql or the 12 + // remote libsql server leaves a connection in a state that cannot safely be reused. 13 + // Most come from Bunny Database killing a transaction that exceeded its server-side 14 + // timeout; the follow-on COMMIT then sees the connection in a poisoned state. 15 + var poisonedTxSubstrings = []string{ 16 + "Transaction timed-out", 17 + "no transaction is active", 18 + "connection has reached an invalid state", 19 + "invalid state, started with", 20 + } 21 + 22 + // IsPoisonedTxErr reports whether err indicates the underlying connection is no 23 + // longer usable for further statements. Callers should evict the connection from 24 + // the pool when this returns true. 25 + func IsPoisonedTxErr(err error) bool { 26 + if err == nil { 27 + return false 28 + } 29 + msg := err.Error() 30 + for _, s := range poisonedTxSubstrings { 31 + if strings.Contains(msg, s) { 32 + return true 33 + } 34 + } 35 + return false 36 + } 37 + 38 + // ExecResilient borrows a dedicated connection from db, runs fn against it, and 39 + // evicts the connection from the pool when fn returns a poisoned-transaction 40 + // error. The connection is always released via Close. 41 + // 42 + // Poison eviction works by returning driver.ErrBadConn from within conn.Raw: 43 + // database/sql treats that as a signal to discard the underlying driver conn 44 + // rather than returning it to the idle pool. 45 + // 46 + // ExecResilient does NOT retry. Callers wrap the call in their own retry policy 47 + // when that is desired (for example, a single retry on the live Jetstream path). 48 + func ExecResilient(ctx context.Context, db *sql.DB, fn func(*sql.Conn) error) error { 49 + conn, err := db.Conn(ctx) 50 + if err != nil { 51 + return err 52 + } 53 + defer conn.Close() 54 + 55 + execErr := fn(conn) 56 + if IsPoisonedTxErr(execErr) { 57 + // Discard the underlying driver conn so it never serves another caller. 58 + // The Raw callback's return value is what triggers eviction; we ignore 59 + // any error from Raw itself. 60 + _ = conn.Raw(func(any) error { return driver.ErrBadConn }) 61 + } 62 + return execErr 63 + } 64 + 65 + // ErrNoPoolConn is returned by ExecResilient when a connection cannot be 66 + // obtained from the pool (e.g. context cancelled). It wraps the underlying 67 + // pool error for callers that want to distinguish pool-exhaustion from 68 + // statement-level errors. 69 + var ErrNoPoolConn = errors.New("db: failed to acquire pool connection")
+28
pkg/appview/db/conn_test.go
··· 1 + package db 2 + 3 + import ( 4 + "errors" 5 + "testing" 6 + ) 7 + 8 + func TestIsPoisonedTxErr(t *testing.T) { 9 + cases := []struct { 10 + name string 11 + err error 12 + want bool 13 + }{ 14 + {"nil", nil, false}, 15 + {"unrelated", errors.New("disk full"), false}, 16 + {"bunny timeout", errors.New("Remote SQlite failure: `2:0:Transaction timed-out`"), true}, 17 + {"no active tx", errors.New("Remote SQlite failure: `3:1:cannot commit - no transaction is active`"), true}, 18 + {"init state", errors.New("error code = 2: Error executing statement: connection has reached an invalid state, started with Init"), true}, 19 + {"just invalid state", errors.New("generic failure: invalid state, started with Query"), true}, 20 + } 21 + for _, c := range cases { 22 + t.Run(c.name, func(t *testing.T) { 23 + if got := IsPoisonedTxErr(c.err); got != c.want { 24 + t.Errorf("IsPoisonedTxErr(%v) = %v, want %v", c.err, got, c.want) 25 + } 26 + }) 27 + } 28 + }
+33
pkg/appview/db/jetstream_cursor.go
··· 1 + package db 2 + 3 + import ( 4 + "database/sql" 5 + "errors" 6 + ) 7 + 8 + // GetJetstreamCursor returns the last persisted Jetstream cursor (time_us). 9 + // Returns 0 when no cursor has been saved yet (e.g. fresh database). 10 + func GetJetstreamCursor(db DBTX) (int64, error) { 11 + var cursor int64 12 + err := db.QueryRow(`SELECT cursor FROM jetstream_cursor WHERE id = 1`).Scan(&cursor) 13 + if errors.Is(err, sql.ErrNoRows) { 14 + return 0, nil 15 + } 16 + if err != nil { 17 + return 0, err 18 + } 19 + return cursor, nil 20 + } 21 + 22 + // SaveJetstreamCursor writes the given cursor to the singleton jetstream_cursor row. 23 + // Idempotent — safe to call on every tick. 24 + func SaveJetstreamCursor(db DBTX, cursor int64) error { 25 + _, err := db.Exec(` 26 + INSERT INTO jetstream_cursor (id, cursor, updated_at) 27 + VALUES (1, ?, CURRENT_TIMESTAMP) 28 + ON CONFLICT(id) DO UPDATE SET 29 + cursor = excluded.cursor, 30 + updated_at = excluded.updated_at 31 + `, cursor) 32 + return err 33 + }
+50
pkg/appview/db/jetstream_cursor_test.go
··· 1 + package db 2 + 3 + import ( 4 + "fmt" 5 + "strings" 6 + "testing" 7 + ) 8 + 9 + func TestJetstreamCursor_RoundTrip(t *testing.T) { 10 + safeName := strings.ReplaceAll(t.Name(), "/", "_") 11 + d, err := InitDB(fmt.Sprintf("file:%s?mode=memory&cache=shared", safeName), LibsqlConfig{}) 12 + if err != nil { 13 + t.Fatalf("init db: %v", err) 14 + } 15 + d.SetMaxOpenConns(1) 16 + defer d.Close() 17 + 18 + // Fresh DB: no persisted cursor. 19 + got, err := GetJetstreamCursor(d) 20 + if err != nil { 21 + t.Fatalf("get empty: %v", err) 22 + } 23 + if got != 0 { 24 + t.Errorf("initial cursor = %d, want 0", got) 25 + } 26 + 27 + // Save → read. 28 + if err := SaveJetstreamCursor(d, 1234567890); err != nil { 29 + t.Fatalf("save: %v", err) 30 + } 31 + got, err = GetJetstreamCursor(d) 32 + if err != nil { 33 + t.Fatalf("get after save: %v", err) 34 + } 35 + if got != 1234567890 { 36 + t.Errorf("cursor = %d, want 1234567890", got) 37 + } 38 + 39 + // Overwrite with newer value. 40 + if err := SaveJetstreamCursor(d, 9999999999); err != nil { 41 + t.Fatalf("save 2: %v", err) 42 + } 43 + got, err = GetJetstreamCursor(d) 44 + if err != nil { 45 + t.Fatalf("get 2: %v", err) 46 + } 47 + if got != 9999999999 { 48 + t.Errorf("cursor after overwrite = %d, want 9999999999", got) 49 + } 50 + }
+7
pkg/appview/db/migrations/0022_create_jetstream_cursor.yaml
··· 1 + description: Persist Jetstream cursor so reconnects resume from last processed event 2 + query: | 3 + CREATE TABLE IF NOT EXISTS jetstream_cursor ( 4 + id INTEGER PRIMARY KEY CHECK (id = 1), 5 + cursor INTEGER NOT NULL, 6 + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP 7 + );
+518
pkg/appview/jetstream/backfill_batch.go
··· 1 + package jetstream 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "fmt" 7 + "log/slog" 8 + "strings" 9 + "time" 10 + 11 + "atcr.io/pkg/appview/db" 12 + "atcr.io/pkg/atproto" 13 + ) 14 + 15 + // batchManifests decodes all manifest records for a repo and writes them as 16 + // a small set of multi-row INSERTs: one per table (manifests, layers, 17 + // manifest_references, repository_annotations). This replaces the previous 18 + // per-record chunked-transaction loop, which exceeded Bunny Database's 19 + // remote transaction timeout once chunks grew large. 20 + // 21 + // Returns the number of manifest records that were successfully decoded and 22 + // included in the batch. Decode/validation failures are logged and skipped. 23 + func (b *BackfillWorker) batchManifests(ctx context.Context, did string, records []atproto.Record) (int, error) { 24 + if len(records) == 0 { 25 + return 0, nil 26 + } 27 + 28 + type decoded struct { 29 + manifestRecord atproto.ManifestRecord 30 + manifest db.Manifest 31 + } 32 + 33 + decodedRecords := make([]decoded, 0, len(records)) 34 + for i := range records { 35 + r := &records[i] 36 + var mr atproto.ManifestRecord 37 + if err := json.Unmarshal(r.Value, &mr); err != nil { 38 + slog.Warn("Backfill skipping invalid manifest record", "uri", r.URI, "error", err) 39 + continue 40 + } 41 + if mr.Digest == "" || mr.Repository == "" { 42 + slog.Warn("Backfill skipping manifest with missing fields", "uri", r.URI) 43 + continue 44 + } 45 + 46 + // Resolve holdDID the same way the single-record path does. 47 + holdDID := mr.HoldDID 48 + if holdDID == "" && mr.HoldEndpoint != "" { 49 + if resolved, err := atproto.ResolveHoldDID(ctx, mr.HoldEndpoint); err == nil { 50 + holdDID = resolved 51 + } 52 + } 53 + 54 + isList := len(mr.Manifests) > 0 55 + artifactType := "container-image" 56 + if !isList && mr.Config != nil { 57 + artifactType = db.GetArtifactType(mr.Config.MediaType) 58 + } 59 + 60 + m := db.Manifest{ 61 + DID: did, 62 + Repository: mr.Repository, 63 + Digest: mr.Digest, 64 + MediaType: mr.MediaType, 65 + SchemaVersion: mr.SchemaVersion, 66 + HoldEndpoint: holdDID, 67 + ArtifactType: artifactType, 68 + CreatedAt: mr.CreatedAt, 69 + } 70 + if !isList && mr.Config != nil { 71 + m.ConfigDigest = mr.Config.Digest 72 + m.ConfigSize = mr.Config.Size 73 + } 74 + if mr.Subject != nil { 75 + m.SubjectDigest = mr.Subject.Digest 76 + } 77 + decodedRecords = append(decodedRecords, decoded{mr, m}) 78 + } 79 + 80 + if len(decodedRecords) == 0 { 81 + return 0, nil 82 + } 83 + 84 + // Phase 1: upsert all manifests in one batch, fetch ids. 85 + manifests := make([]db.Manifest, len(decodedRecords)) 86 + for i, d := range decodedRecords { 87 + manifests[i] = d.manifest 88 + } 89 + ids, err := db.BatchInsertManifests(b.db, manifests) 90 + if err != nil { 91 + return 0, fmt.Errorf("batch insert manifests: %w", err) 92 + } 93 + 94 + // Phase 2: derive layers, references, and annotations using the returned ids. 95 + var ( 96 + layerRows []db.Layer 97 + refRows []db.ManifestReference 98 + ) 99 + 100 + // For annotations, we keep only the newest manifest per (did, repo) with a 101 + // non-empty annotation set. Matches reconcileAnnotations semantics at 102 + // backfill.go:573. 103 + type newest struct { 104 + createdAt time.Time 105 + annotations map[string]string 106 + } 107 + newestByRepo := make(map[string]newest) 108 + 109 + for _, d := range decodedRecords { 110 + mid, ok := ids[db.ManifestKey(did, d.manifest.Repository, d.manifest.Digest)] 111 + if !ok { 112 + // BatchInsertManifests did not return an id for this row — either the 113 + // row was constraint-rejected or the SELECT missed it. Skip its 114 + // dependent rows rather than inserting with id 0. 115 + slog.Warn("Backfill manifest missing id after batch insert", 116 + "did", did, "repository", d.manifest.Repository, "digest", d.manifest.Digest) 117 + continue 118 + } 119 + 120 + if len(d.manifestRecord.Manifests) > 0 { 121 + for i, ref := range d.manifestRecord.Manifests { 122 + var pa, po, pv, pov string 123 + if ref.Platform != nil { 124 + pa = ref.Platform.Architecture 125 + po = ref.Platform.OS 126 + pv = ref.Platform.Variant 127 + pov = ref.Platform.OSVersion 128 + } 129 + isAttestation := false 130 + if refType, ok := ref.Annotations["vnd.docker.reference.type"]; ok { 131 + isAttestation = refType == "attestation-manifest" 132 + } 133 + refRows = append(refRows, db.ManifestReference{ 134 + ManifestID: mid, 135 + Digest: ref.Digest, 136 + MediaType: ref.MediaType, 137 + Size: ref.Size, 138 + PlatformArchitecture: pa, 139 + PlatformOS: po, 140 + PlatformVariant: pv, 141 + PlatformOSVersion: pov, 142 + IsAttestation: isAttestation, 143 + ReferenceIndex: i, 144 + }) 145 + } 146 + } else { 147 + for i, layer := range d.manifestRecord.Layers { 148 + layerRows = append(layerRows, db.Layer{ 149 + ManifestID: mid, 150 + Digest: layer.Digest, 151 + MediaType: layer.MediaType, 152 + Size: layer.Size, 153 + LayerIndex: i, 154 + Annotations: layer.Annotations, 155 + }) 156 + } 157 + } 158 + 159 + if hasNonEmpty(d.manifestRecord.Annotations) { 160 + key := d.manifest.Repository 161 + prev, ok := newestByRepo[key] 162 + if !ok || d.manifestRecord.CreatedAt.After(prev.createdAt) { 163 + newestByRepo[key] = newest{d.manifestRecord.CreatedAt, d.manifestRecord.Annotations} 164 + } 165 + } 166 + } 167 + 168 + if err := db.BatchInsertLayers(b.db, layerRows); err != nil { 169 + return 0, err 170 + } 171 + if err := db.BatchInsertManifestReferences(b.db, refRows); err != nil { 172 + return 0, err 173 + } 174 + 175 + // Flatten annotations into AnnotationRows. 176 + var annotationRows []db.AnnotationRow 177 + for repo, n := range newestByRepo { 178 + for k, v := range n.annotations { 179 + if v == "" { 180 + continue 181 + } 182 + annotationRows = append(annotationRows, db.AnnotationRow{ 183 + DID: did, 184 + Repository: repo, 185 + Key: k, 186 + Value: v, 187 + }) 188 + } 189 + } 190 + if err := db.BatchUpsertRepositoryAnnotations(b.db, annotationRows); err != nil { 191 + return 0, err 192 + } 193 + 194 + slog.Info("Backfill batch manifests", 195 + "did", did, 196 + "manifests", len(manifests), 197 + "layers", len(layerRows), 198 + "references", len(refRows), 199 + "annotations", len(annotationRows)) 200 + 201 + return len(decodedRecords), nil 202 + } 203 + 204 + func hasNonEmpty(m map[string]string) bool { 205 + for _, v := range m { 206 + if v != "" { 207 + return true 208 + } 209 + } 210 + return false 211 + } 212 + 213 + // batchTags decodes tag records and writes them in one multi-row upsert. 214 + func (b *BackfillWorker) batchTags(did string, records []atproto.Record) (int, error) { 215 + tags := make([]db.Tag, 0, len(records)) 216 + for i := range records { 217 + r := &records[i] 218 + var tr atproto.TagRecord 219 + if err := json.Unmarshal(r.Value, &tr); err != nil { 220 + slog.Warn("Backfill skipping invalid tag record", "uri", r.URI, "error", err) 221 + continue 222 + } 223 + digest, err := tr.GetManifestDigest() 224 + if err != nil { 225 + slog.Warn("Backfill skipping tag record without digest", "uri", r.URI, "error", err) 226 + continue 227 + } 228 + if tr.Repository == "" || tr.Tag == "" { 229 + continue 230 + } 231 + tags = append(tags, db.Tag{ 232 + DID: did, 233 + Repository: tr.Repository, 234 + Tag: tr.Tag, 235 + Digest: digest, 236 + CreatedAt: tr.UpdatedAt, 237 + }) 238 + } 239 + if err := db.BatchUpsertTags(b.db, tags); err != nil { 240 + return 0, err 241 + } 242 + slog.Info("Backfill batch tags", "did", did, "rows", len(tags)) 243 + return len(tags), nil 244 + } 245 + 246 + // batchStars decodes star records and writes them in one multi-row upsert. 247 + // Ensures star subject owners exist as users first (FK requirement). 248 + func (b *BackfillWorker) batchStars(ctx context.Context, did string, records []atproto.Record) (int, error) { 249 + stars := make([]db.StarInput, 0, len(records)) 250 + ownerDIDs := make(map[string]struct{}) 251 + 252 + for i := range records { 253 + r := &records[i] 254 + var sr atproto.StarRecord 255 + if err := json.Unmarshal(r.Value, &sr); err != nil { 256 + slog.Warn("Backfill skipping invalid star record", "uri", r.URI, "error", err) 257 + continue 258 + } 259 + owner, repo, err := sr.GetSubjectDIDAndRepository() 260 + if err != nil { 261 + slog.Warn("Backfill skipping star with bad subject", "uri", r.URI, "error", err) 262 + continue 263 + } 264 + ownerDIDs[owner] = struct{}{} 265 + stars = append(stars, db.StarInput{ 266 + StarrerDID: did, 267 + OwnerDID: owner, 268 + Repository: repo, 269 + CreatedAt: sr.CreatedAt, 270 + }) 271 + } 272 + 273 + // Ensure every star subject has a users row (FK to users.did on stars). 274 + // These calls are idempotent and cached, so repeated owners cost nothing. 275 + for owner := range ownerDIDs { 276 + if err := b.processor.EnsureUserExists(ctx, owner); err != nil { 277 + slog.Warn("Backfill failed to ensure star subject user", "owner_did", owner, "error", err) 278 + } 279 + } 280 + 281 + if err := db.BatchUpsertStars(b.db, stars); err != nil { 282 + return 0, err 283 + } 284 + slog.Info("Backfill batch stars", "did", did, "rows", len(stars)) 285 + return len(stars), nil 286 + } 287 + 288 + // batchRepoPages decodes repo page records and writes them in one upsert. 289 + func (b *BackfillWorker) batchRepoPages(did string, records []atproto.Record) (int, error) { 290 + pages := make([]db.RepoPage, 0, len(records)) 291 + for i := range records { 292 + r := &records[i] 293 + var pr atproto.RepoPageRecord 294 + if err := json.Unmarshal(r.Value, &pr); err != nil { 295 + slog.Warn("Backfill skipping invalid repo page", "uri", r.URI, "error", err) 296 + continue 297 + } 298 + if pr.Repository == "" { 299 + continue 300 + } 301 + avatarCID := "" 302 + if pr.Avatar != nil && pr.Avatar.Ref.Link != "" { 303 + avatarCID = pr.Avatar.Ref.Link 304 + } 305 + pages = append(pages, db.RepoPage{ 306 + DID: did, 307 + Repository: pr.Repository, 308 + Description: pr.Description, 309 + AvatarCID: avatarCID, 310 + UserEdited: pr.UserEdited, 311 + CreatedAt: pr.CreatedAt, 312 + UpdatedAt: pr.UpdatedAt, 313 + }) 314 + } 315 + if err := db.BatchUpsertRepoPages(b.db, pages); err != nil { 316 + return 0, err 317 + } 318 + slog.Info("Backfill batch repo pages", "did", did, "rows", len(pages)) 319 + return len(pages), nil 320 + } 321 + 322 + // batchDailyStats decodes daily stats records and writes them in one upsert. 323 + // Ensures every distinct owner exists as a user first (FK requirement). 324 + func (b *BackfillWorker) batchDailyStats(ctx context.Context, holdDID string, records []atproto.Record) (int, error) { 325 + stats := make([]db.DailyStats, 0, len(records)) 326 + ownerDIDs := make(map[string]struct{}) 327 + 328 + for i := range records { 329 + r := &records[i] 330 + var dr atproto.DailyStatsRecord 331 + if err := json.Unmarshal(r.Value, &dr); err != nil { 332 + slog.Warn("Backfill skipping invalid daily stats", "uri", r.URI, "error", err) 333 + continue 334 + } 335 + if dr.OwnerDID == "" || dr.Repository == "" || dr.Date == "" { 336 + continue 337 + } 338 + ownerDIDs[dr.OwnerDID] = struct{}{} 339 + stats = append(stats, db.DailyStats{ 340 + DID: dr.OwnerDID, 341 + Repository: dr.Repository, 342 + Date: dr.Date, 343 + PullCount: int(dr.PullCount), 344 + PushCount: int(dr.PushCount), 345 + }) 346 + } 347 + 348 + for owner := range ownerDIDs { 349 + if err := b.processor.EnsureUserExists(ctx, owner); err != nil { 350 + slog.Warn("Backfill failed to ensure daily stats owner user", "owner_did", owner, "error", err) 351 + } 352 + } 353 + 354 + if err := db.BatchUpsertDailyStats(b.db, stats); err != nil { 355 + return 0, err 356 + } 357 + slog.Info("Backfill batch daily stats", "hold_did", holdDID, "rows", len(stats)) 358 + return len(stats), nil 359 + } 360 + 361 + // batchStats updates the in-memory stats cache from a hold's stats records, 362 + // then flushes the aggregated view of every touched (owner, repo) to the 363 + // repository_stats table in a single multi-row upsert. Aggregation is across 364 + // all holds known to the cache, preserving the single-record semantics. 365 + func (b *BackfillWorker) batchStats(ctx context.Context, holdDID string, records []atproto.Record) (int, error) { 366 + type key struct{ owner, repo string } 367 + touched := make(map[key]struct{}) 368 + ownerDIDs := make(map[string]struct{}) 369 + 370 + for i := range records { 371 + r := &records[i] 372 + var sr atproto.StatsRecord 373 + if err := json.Unmarshal(r.Value, &sr); err != nil { 374 + slog.Warn("Backfill skipping invalid stats record", "uri", r.URI, "error", err) 375 + continue 376 + } 377 + if sr.OwnerDID == "" || sr.Repository == "" { 378 + continue 379 + } 380 + 381 + var lastPull, lastPush *time.Time 382 + if sr.LastPull != "" { 383 + if t, err := time.Parse(time.RFC3339, sr.LastPull); err == nil { 384 + lastPull = &t 385 + } 386 + } 387 + if sr.LastPush != "" { 388 + if t, err := time.Parse(time.RFC3339, sr.LastPush); err == nil { 389 + lastPush = &t 390 + } 391 + } 392 + 393 + b.processor.statsCache.Update(holdDID, sr.OwnerDID, sr.Repository, 394 + sr.PullCount, sr.PushCount, lastPull, lastPush) 395 + touched[key{sr.OwnerDID, sr.Repository}] = struct{}{} 396 + ownerDIDs[sr.OwnerDID] = struct{}{} 397 + } 398 + 399 + for owner := range ownerDIDs { 400 + if err := b.processor.EnsureUserExists(ctx, owner); err != nil { 401 + slog.Warn("Backfill failed to ensure stats owner user", "owner_did", owner, "error", err) 402 + } 403 + } 404 + 405 + // Build aggregated rows from the cache. 406 + rows := make([]db.RepositoryStats, 0, len(touched)) 407 + for k := range touched { 408 + totalPull, totalPush, latestPull, latestPush := b.processor.statsCache.GetAggregated(k.owner, k.repo) 409 + rows = append(rows, db.RepositoryStats{ 410 + DID: k.owner, 411 + Repository: k.repo, 412 + PullCount: int(totalPull), 413 + PushCount: int(totalPush), 414 + LastPull: latestPull, 415 + LastPush: latestPush, 416 + }) 417 + } 418 + if err := db.BatchUpsertRepositoryStats(b.db, rows); err != nil { 419 + return 0, err 420 + } 421 + slog.Info("Backfill batch stats", "hold_did", holdDID, "rows", len(rows)) 422 + return len(rows), nil 423 + } 424 + 425 + // batchCaptains decodes captain records and writes them in one upsert. 426 + func (b *BackfillWorker) batchCaptains(holdDID string, records []atproto.Record) (int, error) { 427 + captains := make([]db.HoldCaptainRecord, 0, len(records)) 428 + now := time.Now() 429 + for i := range records { 430 + r := &records[i] 431 + var cr atproto.CaptainRecord 432 + if err := json.Unmarshal(r.Value, &cr); err != nil { 433 + slog.Warn("Backfill skipping invalid captain record", "uri", r.URI, "error", err) 434 + continue 435 + } 436 + if cr.Owner == "" || !strings.HasPrefix(cr.Owner, "did:") { 437 + slog.Warn("Backfill skipping captain with invalid owner", "uri", r.URI) 438 + continue 439 + } 440 + // Captain rkey is the hold DID (collections are stored on each hold's PDS, 441 + // so record.URI already encodes the hold DID in the authority segment). 442 + recordHoldDID := extractDIDFromURI(r.URI) 443 + if recordHoldDID == "" { 444 + recordHoldDID = holdDID 445 + } 446 + captains = append(captains, db.HoldCaptainRecord{ 447 + HoldDID: recordHoldDID, 448 + OwnerDID: cr.Owner, 449 + Public: cr.Public, 450 + AllowAllCrew: cr.AllowAllCrew, 451 + DeployedAt: cr.DeployedAt, 452 + Region: cr.Region, 453 + Successor: cr.Successor, 454 + UpdatedAt: now, 455 + }) 456 + } 457 + if err := db.BatchUpsertCaptainRecords(b.db, captains); err != nil { 458 + return 0, err 459 + } 460 + slog.Info("Backfill batch captains", "rows", len(captains)) 461 + return len(captains), nil 462 + } 463 + 464 + // batchCrew decodes crew records and writes them in one upsert. 465 + func (b *BackfillWorker) batchCrew(holdDID string, records []atproto.Record) (int, error) { 466 + members := make([]db.CrewMember, 0, len(records)) 467 + for i := range records { 468 + r := &records[i] 469 + var cr atproto.CrewRecord 470 + if err := json.Unmarshal(r.Value, &cr); err != nil { 471 + slog.Warn("Backfill skipping invalid crew record", "uri", r.URI, "error", err) 472 + continue 473 + } 474 + if cr.Member == "" || !strings.HasPrefix(cr.Member, "did:") { 475 + slog.Warn("Backfill skipping crew with invalid member", "uri", r.URI) 476 + continue 477 + } 478 + recordHoldDID := extractDIDFromURI(r.URI) 479 + if recordHoldDID == "" { 480 + recordHoldDID = holdDID 481 + } 482 + permsJSON := "" 483 + if len(cr.Permissions) > 0 { 484 + if b, err := json.Marshal(cr.Permissions); err == nil { 485 + permsJSON = string(b) 486 + } 487 + } 488 + rkey := extractRkeyFromURI(r.URI) 489 + members = append(members, db.CrewMember{ 490 + HoldDID: recordHoldDID, 491 + MemberDID: cr.Member, 492 + Rkey: rkey, 493 + Role: cr.Role, 494 + Permissions: permsJSON, 495 + Tier: cr.Tier, 496 + AddedAt: cr.AddedAt, 497 + }) 498 + } 499 + if err := db.BatchUpsertCrewMembers(b.db, members); err != nil { 500 + return 0, err 501 + } 502 + slog.Info("Backfill batch crew", "hold_did", holdDID, "rows", len(members)) 503 + return len(members), nil 504 + } 505 + 506 + // extractDIDFromURI pulls the DID authority segment out of an AT-URI. 507 + // Format: at://did:…/collection/rkey → "did:…". 508 + func extractDIDFromURI(uri string) string { 509 + const prefix = "at://" 510 + if !strings.HasPrefix(uri, prefix) { 511 + return "" 512 + } 513 + rest := uri[len(prefix):] 514 + if slash := strings.IndexByte(rest, '/'); slash >= 0 { 515 + return rest[:slash] 516 + } 517 + return rest 518 + }
+2 -2
pkg/appview/src/css/main.css
··· 653 653 } 654 654 655 655 .sailor-typeahead-avatar { 656 - @apply flex-shrink-0 w-9 h-9 rounded-full overflow-hidden; 656 + @apply shrink-0 w-9 h-9 rounded-full overflow-hidden; 657 657 @apply bg-base-300; 658 658 } 659 659 ··· 686 686 } 687 687 688 688 .sailor-typeahead-selected .sailor-typeahead-clear { 689 - @apply flex-shrink-0 w-8 h-8 rounded-full; 689 + @apply shrink-0 w-8 h-8 rounded-full; 690 690 @apply flex items-center justify-center; 691 691 @apply text-xl leading-none text-base-content/60; 692 692 @apply hover:bg-base-300 hover:text-base-content;
+1 -1
pkg/appview/templates/components/repo-card.html
··· 43 43 </div> 44 44 </div> 45 45 {{ if .Description }} 46 - <p class="text-base-content/60 text-sm line-clamp-3 break-words m-0 my-4">{{ .Description }}</p> 46 + <p class="text-base-content/60 text-sm line-clamp-3 wrap-break-word m-0 my-4">{{ .Description }}</p> 47 47 {{ end }} 48 48 <div class="flex-1 flex flex-col justify-end py-2 min-w-0"> 49 49 {{ if eq .ArtifactType "helm-chart" }}