···11+package main
22+33+import (
44+ "context"
55+ "fmt"
66+ "strings"
77+88+ "atcr.io/pkg/atproto"
99+ "atcr.io/pkg/hold"
1010+1111+ "github.com/spf13/cobra"
1212+)
1313+1414+// Media-type fragments that identify artifact types the scanner intentionally
1515+// skips. Keep this list in sync with scanner/internal/scan/worker.go's
1616+// unscannableConfigTypes — that map keys on config media types; here we look
1717+// at *layer* media types because the backfill walks the hold's layer index
1818+// (which has manifest AT-URIs we can join against scan records).
1919+//
2020+// Detection by layer media type is reliable: helm charts always have a single
2121+// layer with media type application/vnd.cncf.helm.chart.content.v1.tar+gzip;
2222+// in-toto / DSSE attestations use distinct layer types too.
2323+var unscannableLayerMediaSubstrings = []string{
2424+ "helm.chart.content",
2525+ "in-toto",
2626+ "dsse.envelope",
2727+}
2828+2929+var scanBackfillConfigFile string
3030+3131+var scanBackfillCmd = &cobra.Command{
3232+ Use: "scan-backfill",
3333+ Short: "Rewrite legacy scan records to use the status field",
3434+ Long: `Walks every io.atcr.hold.scan record on this hold and assigns a status
3535+("skipped" or "failed") to records that pre-date the status field.
3636+3737+A legacy record is one with an empty status, no SBOM blob, and zero vulnerability
3838+counts. The tool inspects each record's manifest's layers to decide:
3939+4040+ - layer media type matches helm/in-toto/DSSE → status="skipped"
4141+ - everything else → status="failed"
4242+4343+The tool is idempotent: records that already have a status are left alone.
4444+Run once per hold after upgrading.`,
4545+ Args: cobra.NoArgs,
4646+ RunE: func(cmd *cobra.Command, args []string) error {
4747+ cfg, err := hold.LoadConfig(scanBackfillConfigFile)
4848+ if err != nil {
4949+ return fmt.Errorf("load config: %w", err)
5050+ }
5151+5252+ ctx := context.Background()
5353+ holdPDS, cleanup, err := openHoldPDS(ctx, cfg)
5454+ if err != nil {
5555+ return err
5656+ }
5757+ defer cleanup()
5858+5959+ ri := holdPDS.RecordsIndex()
6060+ if ri == nil {
6161+ return fmt.Errorf("records index not available")
6262+ }
6363+6464+ const batchSize = 200
6565+ var (
6666+ cursor string
6767+ scanned int
6868+ rewritten int
6969+ markSkipped int
7070+ markFailed int
7171+ alreadyOK int
7272+ )
7373+7474+ for {
7575+ records, nextCursor, err := ri.ListRecords(atproto.ScanCollection, batchSize, cursor, true)
7676+ if err != nil {
7777+ return fmt.Errorf("list scan records: %w", err)
7878+ }
7979+8080+ for _, rec := range records {
8181+ scanned++
8282+ manifestDigest := "sha256:" + rec.Rkey
8383+8484+ _, scanRecord, err := holdPDS.GetScanRecord(ctx, manifestDigest)
8585+ if err != nil {
8686+ fmt.Fprintf(cmd.ErrOrStderr(), " skip rkey=%s: get failed: %v\n", rec.Rkey, err)
8787+ continue
8888+ }
8989+9090+ // Already classified — nothing to do.
9191+ if scanRecord.Status != "" {
9292+ alreadyOK++
9393+ continue
9494+ }
9595+9696+ // Only legacy records that signal failure (nil blob + zero
9797+ // counts) are candidates. Records with real data don't need
9898+ // rewriting; their absent status will be treated as "ok".
9999+ if scanRecord.SbomBlob != nil || scanRecord.Total != 0 {
100100+ alreadyOK++
101101+ continue
102102+ }
103103+104104+ // Determine artifact type from layer media types.
105105+ layers, err := holdPDS.ListLayerRecordsForManifest(ctx, scanRecord.Manifest)
106106+ if err != nil {
107107+ fmt.Fprintf(cmd.ErrOrStderr(), " skip rkey=%s: list layers failed: %v\n", rec.Rkey, err)
108108+ continue
109109+ }
110110+111111+ skipped := false
112112+ for _, l := range layers {
113113+ for _, frag := range unscannableLayerMediaSubstrings {
114114+ if strings.Contains(l.MediaType, frag) {
115115+ skipped = true
116116+ break
117117+ }
118118+ }
119119+ if skipped {
120120+ break
121121+ }
122122+ }
123123+124124+ var rewrite *atproto.ScanRecord
125125+ if skipped {
126126+ rewrite = atproto.NewSkippedScanRecord(
127127+ manifestDigest,
128128+ scanRecord.Repository,
129129+ scanRecord.UserDID,
130130+ "backfilled: unscannable artifact type",
131131+ scanRecord.ScannerVersion,
132132+ )
133133+ markSkipped++
134134+ } else {
135135+ rewrite = atproto.NewFailedScanRecord(
136136+ manifestDigest,
137137+ scanRecord.Repository,
138138+ scanRecord.UserDID,
139139+ "backfilled: legacy record (no SBOM and zero counts)",
140140+ scanRecord.ScannerVersion,
141141+ )
142142+ markFailed++
143143+ }
144144+ // Preserve the original ScannedAt — rewriting it would either
145145+ // reset the rescan timer or invalidate audit signals.
146146+ if scanRecord.ScannedAt != "" {
147147+ rewrite.ScannedAt = scanRecord.ScannedAt
148148+ }
149149+150150+ if _, _, err := holdPDS.CreateScanRecord(ctx, rewrite); err != nil {
151151+ fmt.Fprintf(cmd.ErrOrStderr(), " rewrite rkey=%s failed: %v\n", rec.Rkey, err)
152152+ continue
153153+ }
154154+ rewritten++
155155+ }
156156+157157+ if nextCursor == "" || len(records) == 0 {
158158+ break
159159+ }
160160+ cursor = nextCursor
161161+ }
162162+163163+ fmt.Fprintf(cmd.OutOrStdout(), "Backfill complete:\n")
164164+ fmt.Fprintf(cmd.OutOrStdout(), " scanned: %d\n", scanned)
165165+ fmt.Fprintf(cmd.OutOrStdout(), " already-tagged: %d\n", alreadyOK)
166166+ fmt.Fprintf(cmd.OutOrStdout(), " → skipped: %d\n", markSkipped)
167167+ fmt.Fprintf(cmd.OutOrStdout(), " → failed: %d\n", markFailed)
168168+ fmt.Fprintf(cmd.OutOrStdout(), " rewritten: %d\n", rewritten)
169169+ return nil
170170+ },
171171+}
172172+173173+func init() {
174174+ scanBackfillCmd.Flags().StringVarP(&scanBackfillConfigFile, "config", "c", "", "path to YAML configuration file")
175175+ rootCmd.AddCommand(scanBackfillCmd)
176176+}
+2
deploy/upcloud/configs/appview.yaml.tmpl
···4545legal:
4646 company_name: Seamark
4747 jurisdiction: State of Texas, United States
4848+ai:
4949+ api_key: ""
+46
docs/SBOM_SCANNING.md
···202202 - Record key: SBOM manifest digest
203203 - Contains reference to subject image
204204205205+## Scan Record Status
206206+207207+Every scan attempt produces an `io.atcr.hold.scan` record. The `status` field
208208+tells the appview how to render the result:
209209+210210+| Status | Meaning | Stale-loop behavior |
211211+|-------------|--------------------------------------------------------------------------------------------------|--------------------------------------------------|
212212+| `ok` (or empty) | Scanner produced an SBOM. Vulnerability counts populated; SBOM blob populated. | Re-scanned on the rescan interval (default 7d). |
213213+| `failed` | Scanner ran but errored (network, OOM, parse failure). No SBOM, no counts. | Re-scanned on the rescan interval — failures may be transient. |
214214+| `skipped` | Scanner intentionally bypassed the artifact (helm chart, in-toto attestation, DSSE envelope). The `reason` field explains why. | **Never re-queued.** A skipped record won't change without a code change in the scanner. |
215215+216216+Records written before the `status` field existed have an empty status. The
217217+appview treats empty + nil-blob + zero-count as failed (legacy fallback).
218218+219219+### Unscannable artifact types
220220+221221+The scanner skips artifacts whose config media type appears in
222222+`unscannableConfigTypes` (`scanner/internal/scan/worker.go`). Currently:
223223+224224+- `application/vnd.cncf.helm.config.v1+json` — Helm charts. Rendered with a
225225+ helm-aware digest page (`pkg/appview/handlers/digest.go`) that shows
226226+ Chart.yaml metadata instead of layers / vulns / SBOM.
227227+- `application/vnd.in-toto+json` — in-toto attestations.
228228+- `application/vnd.dsse.envelope.v1+json` — DSSE envelopes (SLSA provenance).
229229+230230+For these types the appview's vuln/SBOM tabs render
231231+*"Vulnerability scanning isn't applied to this artifact type."* — no retry hint.
232232+233233+To add a new unscannable type: append the media type to
234234+`unscannableConfigTypes`. Existing records won't auto-rewrite — run
235235+`atcr-hold scan-backfill` once to convert any pre-existing failure records
236236+into skipped records (see below).
237237+238238+### Backfill tool
239239+240240+`atcr-hold scan-backfill --config <path>` walks every scan record on the
241241+hold and rewrites legacy ones (empty status + nil blob + zero counts) using
242242+the manifest's layer media types as a signal:
243243+244244+- Layer media type contains `helm.chart.content`, `in-toto`, or
245245+ `dsse.envelope` → `status="skipped"`.
246246+- Otherwise → `status="failed"`.
247247+248248+The tool is idempotent and preserves the original `scannedAt`, so it can be
249249+re-run safely. Run once per hold after upgrading.
250250+205251## Accessing SBOMs
206252207253Multiple methods for discovering and retrieving SBOM data.
+11
lexicons/io/atcr/hold/scan.json
···6969 "type": "string",
7070 "format": "datetime",
7171 "description": "RFC3339 timestamp of when the scan completed"
7272+ },
7373+ "status": {
7474+ "type": "string",
7575+ "knownValues": ["ok", "failed", "skipped"],
7676+ "description": "Outcome of the scan attempt. 'ok' (or omitted, for back-compat) means the scanner produced an SBOM. 'failed' means the scanner ran but errored. 'skipped' means the scanner intentionally bypassed this artifact type (e.g. helm charts).",
7777+ "maxLength": 32
7878+ },
7979+ "reason": {
8080+ "type": "string",
8181+ "description": "Optional human-readable explanation for non-ok status (e.g. 'unscannable artifact type application/vnd.cncf.helm.config.v1+json').",
8282+ "maxLength": 256
7283 }
7384 }
7485 }
+13-4
pkg/appview/db/queries.go
···3030 OR hold_did IN (SELECT hold_did FROM hold_crew_members WHERE member_did = ?)
3131)`
32323333+// Artifact type values stored in manifests.artifact_type and returned by
3434+// GetArtifactType. Container-image is the default for OCI/Docker images and
3535+// manifest lists; helm-chart is for OCI helm chart artifacts.
3636+const (
3737+ ArtifactTypeContainerImage = "container-image"
3838+ ArtifactTypeHelmChart = "helm-chart"
3939+ ArtifactTypeUnknown = "unknown"
4040+)
4141+3342// GetArtifactType determines the artifact type based on config media type
3443// Returns: "helm-chart", "container-image", or "unknown"
3544func GetArtifactType(configMediaType string) string {
3645 switch {
3746 case strings.Contains(configMediaType, "helm.config"):
3838- return "helm-chart"
4747+ return ArtifactTypeHelmChart
3948 case strings.Contains(configMediaType, "oci.image.config") ||
4049 strings.Contains(configMediaType, "docker.container.image"):
4141- return "container-image"
5050+ return ArtifactTypeContainerImage
4251 case configMediaType == "":
4352 // Manifest lists don't have a config - treat as container-image
4444- return "container-image"
5353+ return ArtifactTypeContainerImage
4554 default:
4646- return "unknown"
5555+ return ArtifactTypeUnknown
4756 }
4857}
4958
+74-1
pkg/appview/handlers/digest.go
···11package handlers
2233import (
44+ "context"
45 "log/slog"
56 "net/http"
67 "strings"
···2122 EmptyLayer bool // ENV, LABEL, etc. — no actual layer blob
2223}
23242525+// HelmChartContent is the data the helm-aware digest content needs: parsed
2626+// Chart.yaml metadata + a single chart-tarball "layer" pulled from the DB.
2727+type HelmChartContent struct {
2828+ Meta *holdclient.HelmChartMeta
2929+ Tarball *LayerDetail
3030+ MetaFetchFailed bool // hold reachable but config blob couldn't be parsed
3131+ HoldUnreachable bool
3232+}
3333+3434+// buildHelmContent fetches helm chart metadata + the single chart-tarball layer.
3535+// Returns a populated HelmChartContent even when the meta fetch fails so the
3636+// page can still render the artifact card.
3737+func buildHelmContent(ctx context.Context, holdURL string, digest string, dbLayers []db.Layer) *HelmChartContent {
3838+ content := &HelmChartContent{}
3939+ if holdURL == "" {
4040+ content.HoldUnreachable = true
4141+ } else {
4242+ meta, err := holdclient.FetchHelmChartMeta(ctx, holdURL, digest)
4343+ if err != nil {
4444+ slog.Warn("Failed to fetch helm chart meta", "error", err, "digest", digest)
4545+ content.MetaFetchFailed = true
4646+ } else {
4747+ content.Meta = meta
4848+ }
4949+ }
5050+ if len(dbLayers) > 0 {
5151+ // Helm charts are always single-layer (the chart tarball). If somehow
5252+ // multiple are present, pick the one with helm chart content media
5353+ // type, falling back to the first.
5454+ chosen := 0
5555+ for i, l := range dbLayers {
5656+ if strings.Contains(l.MediaType, "helm.chart.content") {
5757+ chosen = i
5858+ break
5959+ }
6060+ }
6161+ l := dbLayers[chosen]
6262+ content.Tarball = &LayerDetail{
6363+ Index: l.LayerIndex + 1,
6464+ Digest: l.Digest,
6565+ Size: l.Size,
6666+ MediaType: l.MediaType,
6767+ }
6868+ }
6969+ return content
7070+}
7171+2472// DigestDetailHandler renders the digest detail page with layers + vulnerabilities.
2573type DigestDetailHandler struct {
2674 BaseUIHandler
···66114 var layers []LayerDetail
67115 var vulnData *vulnDetailsData
68116 var sbomData *sbomDetailsData
117117+ var helmContent *HelmChartContent
6911870119 if manifest.IsManifestList {
71120 // Manifest list: no layers, show platform picker
72121 // Platforms are already populated by GetManifestDetail
122122+ } else if manifest.ArtifactType == db.ArtifactTypeHelmChart {
123123+ // Helm chart: skip OCI history / vuln / SBOM entirely. Fetch helm
124124+ // chart metadata from the same config blob and the single tarball
125125+ // layer from the DB.
126126+ dbLayers, err := db.GetLayersForManifest(h.ReadOnlyDB, manifest.ID)
127127+ if err != nil {
128128+ slog.Warn("Failed to fetch layers", "error", err)
129129+ }
130130+ hold, holdErr := ResolveHold(r.Context(), h.ReadOnlyDB, manifest.HoldEndpoint)
131131+ holdURL := ""
132132+ if holdErr == nil {
133133+ holdURL = hold.URL
134134+ }
135135+ helmContent = buildHelmContent(r.Context(), holdURL, digest, dbLayers)
136136+ if holdErr != nil {
137137+ helmContent.HoldUnreachable = true
138138+ }
73139 } else {
74140 // Single manifest: fetch layers from DB
75141 dbLayers, err := db.GetLayersForManifest(h.ReadOnlyDB, manifest.ID)
···124190 WithCanonical("https://" + h.SiteURL + "/d/" + owner.Handle + "/" + repository + "/" + digest).
125191 WithSiteName(h.ClientShortName)
126192193193+ pageData := NewPageData(r, &h.BaseUIHandler)
127194 data := struct {
128195 PageData
129196 Meta *PageMeta
···133200 Layers []LayerDetail
134201 VulnData *vulnDetailsData
135202 SbomData *sbomDetailsData
203203+ HelmContent *HelmChartContent
136204 SelectedPlatform string
205205+ RegistryURL string
206206+ OciClient string
137207 }{
138138- PageData: NewPageData(r, &h.BaseUIHandler),
208208+ PageData: pageData,
139209 Meta: meta,
140210 Owner: owner,
141211 Repository: repository,
···143213 Layers: layers,
144214 VulnData: vulnData,
145215 SbomData: sbomData,
216216+ HelmContent: helmContent,
146217 SelectedPlatform: selectedPlatform,
218218+ RegistryURL: h.RegistryURL,
219219+ OciClient: pageData.OciClient,
147220 }
148221149222 if err := h.Templates.ExecuteTemplate(w, "digest", data); err != nil {
+67-5
pkg/appview/handlers/digest_content.go
···11package handlers
2233import (
44+ "fmt"
45 "log/slog"
56 "net/http"
67 "strings"
···89910 "atcr.io/pkg/appview/db"
1011 "atcr.io/pkg/appview/holdclient"
1212+ "atcr.io/pkg/appview/middleware"
1113 "atcr.io/pkg/atproto"
1214 "github.com/go-chi/chi/v5"
1315)
···4951 hold, holdErr := ResolveHold(r.Context(), h.ReadOnlyDB, manifest.HoldEndpoint)
5052 holdReachable := holdErr == nil
51535454+ // Helm charts have no scannable layers / vulns / SBOM. Render helm-aware
5555+ // content for the default + "chart" sections, and a not-applicable
5656+ // placeholder for the legacy layers / vulns / sbom sections (which
5757+ // shouldn't be requested for helm but might be if a stale tab fires).
5858+ if manifest.ArtifactType == db.ArtifactTypeHelmChart {
5959+ holdURL := ""
6060+ if holdReachable {
6161+ holdURL = hold.URL
6262+ }
6363+ helm := buildHelmContent(r.Context(), holdURL, digest, dbLayers)
6464+ if !holdReachable {
6565+ helm.HoldUnreachable = true
6666+ }
6767+ helmData := struct {
6868+ Manifest *db.ManifestWithMetadata
6969+ HelmContent *HelmChartContent
7070+ RegistryURL string
7171+ OwnerHandle string
7272+ RepoName string
7373+ OciClient string
7474+ IsLoggedIn bool
7575+ }{
7676+ Manifest: manifest,
7777+ HelmContent: helm,
7878+ RegistryURL: h.RegistryURL,
7979+ OwnerHandle: identifier,
8080+ RepoName: repository,
8181+ OciClient: "", // helm switcher ignores this field
8282+ IsLoggedIn: middleware.GetUser(r) != nil,
8383+ }
8484+ w.Header().Set("Content-Type", "text/html")
8585+ section := r.URL.Query().Get("section")
8686+ switch section {
8787+ case "chart":
8888+ // Used by the repo page's chart tab — no install card here
8989+ // because repo-tag-section already renders one at the top.
9090+ if err := h.Templates.ExecuteTemplate(w, "helm-chart-info", helmData); err != nil {
9191+ slog.Warn("Failed to render helm chart info", "error", err)
9292+ RenderHTMXError(w, r, http.StatusInternalServerError, "Could not render helm chart", err)
9393+ }
9494+ case "layers", "vulns", "sbom":
9595+ // Defensive fallback if a stale tab somehow fires. The repo page
9696+ // hides these tabs for helm; this should be unreachable.
9797+ fmt.Fprint(w, `<p class="text-base-content/70 py-8">Helm charts don't have layers, vulnerabilities, or SBOMs.</p>`)
9898+ default:
9999+ // Digest detail page (full helm view, with install card).
100100+ if err := h.Templates.ExecuteTemplate(w, "helm-digest-content", helmData); err != nil {
101101+ slog.Warn("Failed to render helm digest content", "error", err)
102102+ RenderHTMXError(w, r, http.StatusInternalServerError, "Could not render helm chart", err)
103103+ }
104104+ }
105105+ return
106106+ }
107107+52108 // Parallelize the three hold fetches. They're independent and each
53109 // takes a network round-trip; serial runs add up on slow links.
54110 var (
···93149 }
9415095151 // VulnReason / SbomReason let the template branch distinctly on why
9696- // data is missing instead of collapsing three causes into a generic
9797- // "not available" message.
9898- // ok — data is present
152152+ // data is missing instead of collapsing causes into a generic message.
153153+ // ok — data is present
99154 // hold-unreachable — we couldn't reach the hold
100100- // not-scanned — hold is up but no scan record exists
101101- // fetch-failed — scan record fetch failed on the hold
155155+ // not-scanned — hold is up but no scan record exists
156156+ // not-applicable — scan record exists with status="skipped" (artifact
157157+ // type isn't scanned, e.g. in-toto, DSSE — helm
158158+ // charts go through a separate code path)
159159+ // fetch-failed — scan record fetch failed on the hold
102160 vulnReason := "ok"
103161 if !holdReachable {
104162 vulnReason = "hold-unreachable"
105163 } else if vulnData == nil || vulnData.Error == "never-scanned" {
106164 vulnReason = "not-scanned"
165165+ } else if vulnData.Status == atproto.ScanStatusSkipped {
166166+ vulnReason = "not-applicable"
107167 } else if vulnData.Error != "" {
108168 vulnReason = "fetch-failed"
109169 }
···113173 sbomReason = "hold-unreachable"
114174 } else if sbomData == nil || sbomData.Error == "never-scanned" {
115175 sbomReason = "not-scanned"
176176+ } else if sbomData.Status == atproto.ScanStatusSkipped {
177177+ sbomReason = "not-applicable"
116178 } else if sbomData.Error != "" {
117179 sbomReason = "fetch-failed"
118180 }
···3939 Packages []sbomPackage
4040 Total int
4141 Error string
4242+ Status string // scan record's status field (ok | failed | skipped); empty for legacy records
4343+ Reason string // scan record's reason field (only meaningful when Status != ok)
4244 ScannedAt string
4345 Digest string // image digest (for download URLs)
4446 HoldEndpoint string // hold DID (for download URLs)
···130132 return sbomDetailsData{Error: "Failed to parse scan record"}
131133 }
132134135135+ if scanRecord.Status == atproto.ScanStatusSkipped {
136136+ return sbomDetailsData{
137137+ Status: scanRecord.Status,
138138+ Reason: scanRecord.Reason,
139139+ ScannedAt: scanRecord.ScannedAt,
140140+ }
141141+ }
142142+133143 // Fetch the SBOM blob
134144 if scanRecord.SbomBlob == nil || scanRecord.SbomBlob.Ref.String() == "" {
135145 return sbomDetailsData{
136146 ScannedAt: scanRecord.ScannedAt,
147147+ Status: scanRecord.Status,
148148+ Reason: scanRecord.Reason,
137149 Error: "No SBOM data available",
138150 }
139151 }
···203215 return sbomDetailsData{
204216 Packages: packages,
205217 Total: len(packages),
218218+ Status: scanRecord.Status,
206219 ScannedAt: scanRecord.ScannedAt,
207220 Digest: digest,
208221 HoldEndpoint: holdEndpoint,
+37-13
pkg/appview/handlers/scan_result.go
···2525}
26262727// vulnBadgeData is the template data for the vuln-badge partial.
2828-// The badge renders one of four states, in priority order:
2828+// The badge renders one of five states, in priority order:
2929// 1. Error — we couldn't reach the hold at all (network/5xx)
3030// 2. NotScanned — hold reachable, no scan record for this digest (404)
3131-// 3. ScanFailed — scan record exists but the scanner didn't produce an SBOM
3232-// 4. Found — scan succeeded; render tier counts (or "Clean" when zero)
3131+// 3. Skipped — scan record explicitly marks this artifact as not-scannable
3232+// 4. ScanFailed — scan record exists but the scanner errored
3333+// 5. Found — scan succeeded; render tier counts (or "Clean" when zero)
3334//
3435// These states must stay distinct so users can tell "hold is down" from
3535-// "this hasn't been scanned yet" from "scanner errored on this image".
3636+// "this hasn't been scanned yet" from "scanner errored on this image" from
3737+// "this artifact type is intentionally not scanned".
3638type vulnBadgeData struct {
3739 Critical int64
3840 High int64
···4345 Found bool // true if scan record exists and succeeded
4446 Error bool // true if hold unreachable (network/5xx)
4547 NotScanned bool // true if hold is up but no scan record (404)
4646- ScanFailed bool // true if scan record exists but scan failed (no SBOM)
4848+ ScanFailed bool // true if scan record exists but scan failed
4949+ Skipped bool // true if scan record marks the artifact as intentionally not scanned (helm, in-toto, etc.)
4750 Digest string // for the detail modal link
4851 HoldEndpoint string // for the detail modal link
4952}
50535454+// classifyScanRecord maps a scan record's Status field to badge data flags.
5555+// An empty Status is treated as a legacy record from before the status field
5656+// existed: nil-blob + zero-counts = treat as failed (preserves the prior badge
5757+// for un-backfilled holds); otherwise treat as success.
5858+func classifyScanRecord(scanRecord *atproto.ScanRecord) (found, skipped, failed bool) {
5959+ switch scanRecord.Status {
6060+ case atproto.ScanStatusSkipped:
6161+ return false, true, false
6262+ case atproto.ScanStatusFailed:
6363+ return false, false, true
6464+ case atproto.ScanStatusOK:
6565+ return true, false, false
6666+ default:
6767+ // Legacy record (status field didn't exist when this was written).
6868+ if scanRecord.SbomBlob == nil && scanRecord.Total == 0 {
6969+ return false, false, true
7070+ }
7171+ return true, false, false
7272+ }
7373+}
7474+5175func (h *ScanResultHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
5276 digest := r.URL.Query().Get("digest")
5377 holdEndpoint := r.URL.Query().Get("holdEndpoint")
···122146 return
123147 }
124148125125- // A failed scan has nil blobs (no SBOM generated) and zero counts.
126126- // Successful scans always have an SBOM blob even with 0 vulnerabilities.
127127- scanFailed := scanRecord.SbomBlob == nil && scanRecord.Total == 0
128128-149149+ found, skipped, failed := classifyScanRecord(&scanRecord)
129150 h.renderBadge(w, vulnBadgeData{
130151 Critical: scanRecord.Critical,
131152 High: scanRecord.High,
···133154 Low: scanRecord.Low,
134155 Total: scanRecord.Total,
135156 ScannedAt: scanRecord.ScannedAt,
136136- Found: true,
137137- ScanFailed: scanFailed,
157157+ Found: found,
158158+ Skipped: skipped,
159159+ ScanFailed: failed,
138160 Digest: digest,
139161 HoldEndpoint: holdDID,
140162 })
···189211 return vulnBadgeData{Error: true}
190212 }
191213214214+ found, skipped, failed := classifyScanRecord(&scanRecord)
192215 return vulnBadgeData{
193216 Critical: scanRecord.Critical,
194217 High: scanRecord.High,
···196219 Low: scanRecord.Low,
197220 Total: scanRecord.Total,
198221 ScannedAt: scanRecord.ScannedAt,
199199- Found: true,
200200- ScanFailed: scanRecord.SbomBlob == nil && scanRecord.Total == 0,
222222+ Found: found,
223223+ Skipped: skipped,
224224+ ScanFailed: failed,
201225 Digest: fullDigest,
202226 HoldEndpoint: holdDID,
203227 }
+22-4
pkg/appview/handlers/vuln_details.go
···5757 Matches []vulnMatch
5858 Summary vulnSummary
5959 Error string // non-empty if something went wrong
6060+ Status string // scan record's status field (ok | failed | skipped); empty for legacy records
6161+ Reason string // scan record's reason field (only meaningful when Status != ok)
6062 ScannedAt string
6163 Digest string // image digest (for download URLs)
6264 HoldEndpoint string // hold DID (for download URLs)
···317319 Total: scanRecord.Total,
318320 }
319321322322+ // Skipped scan records have no blobs and a non-failure status. The caller
323323+ // classifies these as "not-applicable" rather than "fetch-failed" so the
324324+ // template can show "scanning isn't applied to this artifact" instead of a
325325+ // retry hint.
326326+ if scanRecord.Status == atproto.ScanStatusSkipped {
327327+ return vulnDetailsData{
328328+ Summary: summary,
329329+ ScannedAt: scanRecord.ScannedAt,
330330+ Status: scanRecord.Status,
331331+ Reason: scanRecord.Reason,
332332+ }
333333+ }
334334+320335 // Fetch the vulnerability report blob
321336 if scanRecord.VulnReportBlob == nil || scanRecord.VulnReportBlob.Ref.String() == "" {
322337 return vulnDetailsData{
323338 Summary: summary,
324339 ScannedAt: scanRecord.ScannedAt,
340340+ Status: scanRecord.Status,
341341+ Reason: scanRecord.Reason,
325342 Error: "No detailed vulnerability report available. Only summary counts were recorded.",
326343 }
327344 }
···335352336353 blobReq, err := http.NewRequestWithContext(ctx, "GET", blobURL, nil)
337354 if err != nil {
338338- return vulnDetailsData{Summary: summary, ScannedAt: scanRecord.ScannedAt, Error: "Failed to build blob request"}
355355+ return vulnDetailsData{Summary: summary, ScannedAt: scanRecord.ScannedAt, Status: scanRecord.Status, Error: "Failed to build blob request"}
339356 }
340357341358 blobResp, err := http.DefaultClient.Do(blobReq)
342359 if err != nil {
343343- return vulnDetailsData{Summary: summary, ScannedAt: scanRecord.ScannedAt, Error: "Failed to fetch vulnerability report"}
360360+ return vulnDetailsData{Summary: summary, ScannedAt: scanRecord.ScannedAt, Status: scanRecord.Status, Error: "Failed to fetch vulnerability report"}
344361 }
345362 defer blobResp.Body.Close()
346363347364 if blobResp.StatusCode != http.StatusOK {
348348- return vulnDetailsData{Summary: summary, ScannedAt: scanRecord.ScannedAt, Error: "Vulnerability report not accessible"}
365365+ return vulnDetailsData{Summary: summary, ScannedAt: scanRecord.ScannedAt, Status: scanRecord.Status, Error: "Vulnerability report not accessible"}
349366 }
350367351368 var report grypeReport
352369 if err := json.NewDecoder(blobResp.Body).Decode(&report); err != nil {
353353- return vulnDetailsData{Summary: summary, ScannedAt: scanRecord.ScannedAt, Error: "Failed to parse vulnerability report"}
370370+ return vulnDetailsData{Summary: summary, ScannedAt: scanRecord.ScannedAt, Status: scanRecord.Status, Error: "Failed to parse vulnerability report"}
354371 }
355372356373 matches := make([]vulnMatch, 0, len(report.Matches))
···390407 return vulnDetailsData{
391408 Matches: matches,
392409 Summary: summary,
410410+ Status: scanRecord.Status,
393411 ScannedAt: scanRecord.ScannedAt,
394412 Digest: digest,
395413 HoldEndpoint: holdEndpoint,
+148
pkg/appview/holdclient/helm_config.go
···11+package holdclient
22+33+import (
44+ "context"
55+ "encoding/json"
66+ "fmt"
77+ "net/http"
88+ "net/url"
99+ "strings"
1010+ "time"
1111+1212+ "atcr.io/pkg/atproto"
1313+)
1414+1515+// HelmChartMeta is the parsed Chart.yaml-equivalent metadata extracted from a
1616+// helm chart's OCI config blob (media type
1717+// application/vnd.cncf.helm.config.v1+json). The helm config blob is Chart.yaml
1818+// rendered as JSON, so the field names mirror Chart.yaml.
1919+type HelmChartMeta struct {
2020+ Name string
2121+ Version string
2222+ AppVersion string
2323+ Type string // "application" | "library" (empty in older charts; treat as "application")
2424+ Description string
2525+ KubeVersion string
2626+ Home string
2727+ Icon string
2828+ Sources []string
2929+ Keywords []string
3030+ Maintainers []HelmMaintainer
3131+ Dependencies []HelmDependency
3232+ Annotations map[string]string
3333+ Deprecated bool
3434+}
3535+3636+type HelmMaintainer struct {
3737+ Name string
3838+ Email string
3939+ URL string
4040+}
4141+4242+type HelmDependency struct {
4343+ Name string
4444+ Version string
4545+ Repository string
4646+ Alias string
4747+ Condition string
4848+}
4949+5050+// helmConfigJSON matches the on-the-wire shape of a helm OCI config blob.
5151+// Field names track Chart.yaml's JSON form (lowerCamelCase for some fields,
5252+// kebab-case for none — helm's CLI marshals Chart.yaml struct directly).
5353+type helmConfigJSON struct {
5454+ Name string `json:"name"`
5555+ Version string `json:"version"`
5656+ AppVersion string `json:"appVersion"`
5757+ Type string `json:"type"`
5858+ Description string `json:"description"`
5959+ KubeVersion string `json:"kubeVersion"`
6060+ Home string `json:"home"`
6161+ Icon string `json:"icon"`
6262+ Sources []string `json:"sources"`
6363+ Keywords []string `json:"keywords"`
6464+ Maintainers []helmMaintainerJSON `json:"maintainers"`
6565+ Dependencies []helmDependencyJSON `json:"dependencies"`
6666+ Annotations map[string]string `json:"annotations"`
6767+ Deprecated bool `json:"deprecated"`
6868+}
6969+7070+type helmMaintainerJSON struct {
7171+ Name string `json:"name"`
7272+ Email string `json:"email"`
7373+ URL string `json:"url"`
7474+}
7575+7676+type helmDependencyJSON struct {
7777+ Name string `json:"name"`
7878+ Version string `json:"version"`
7979+ Repository string `json:"repository"`
8080+ Alias string `json:"alias"`
8181+ Condition string `json:"condition"`
8282+}
8383+8484+// FetchHelmChartMeta fetches a helm chart's config blob from the hold and
8585+// parses it as Chart.yaml metadata. Uses the same getImageConfig XRPC as
8686+// FetchImageConfig but applies a helm-specific schema to the JSON.
8787+func FetchHelmChartMeta(ctx context.Context, holdURL, manifestDigest string) (*HelmChartMeta, error) {
8888+ reqURL := fmt.Sprintf("%s%s?digest=%s",
8989+ strings.TrimSuffix(holdURL, "/"),
9090+ atproto.HoldGetImageConfig,
9191+ url.QueryEscape(manifestDigest),
9292+ )
9393+9494+ ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
9595+ defer cancel()
9696+9797+ req, err := http.NewRequestWithContext(ctx, "GET", reqURL, nil)
9898+ if err != nil {
9999+ return nil, fmt.Errorf("build request: %w", err)
100100+ }
101101+102102+ resp, err := http.DefaultClient.Do(req)
103103+ if err != nil {
104104+ return nil, fmt.Errorf("fetch helm chart config: %w", err)
105105+ }
106106+ defer resp.Body.Close()
107107+108108+ if resp.StatusCode != http.StatusOK {
109109+ return nil, fmt.Errorf("hold returned status %d for %s", resp.StatusCode, reqURL)
110110+ }
111111+112112+ var record struct {
113113+ ConfigJSON string `json:"configJson"`
114114+ }
115115+ if err := json.NewDecoder(resp.Body).Decode(&record); err != nil {
116116+ return nil, fmt.Errorf("parse image config response: %w", err)
117117+ }
118118+119119+ var raw helmConfigJSON
120120+ if err := json.Unmarshal([]byte(record.ConfigJSON), &raw); err != nil {
121121+ return nil, fmt.Errorf("parse helm chart config JSON: %w", err)
122122+ }
123123+124124+ meta := &HelmChartMeta{
125125+ Name: raw.Name,
126126+ Version: raw.Version,
127127+ AppVersion: raw.AppVersion,
128128+ Type: raw.Type,
129129+ Description: raw.Description,
130130+ KubeVersion: raw.KubeVersion,
131131+ Home: raw.Home,
132132+ Icon: raw.Icon,
133133+ Sources: raw.Sources,
134134+ Keywords: raw.Keywords,
135135+ Annotations: raw.Annotations,
136136+ Deprecated: raw.Deprecated,
137137+ }
138138+ if meta.Type == "" {
139139+ meta.Type = "application"
140140+ }
141141+ for _, m := range raw.Maintainers {
142142+ meta.Maintainers = append(meta.Maintainers, HelmMaintainer(m))
143143+ }
144144+ for _, d := range raw.Dependencies {
145145+ meta.Dependencies = append(meta.Dependencies, HelmDependency(d))
146146+ }
147147+ return meta, nil
148148+}
···77 {{ icon "wifi-off" "size-4 shrink-0" }}
88 <span>We couldn't reach the hold to load the SBOM.</span>
99 </div>
1010+ {{ else if eq .SbomReason "not-applicable" }}
1111+ <p class="text-base-content/70">SBOMs aren't generated for this artifact type. ATCR only produces SBOMs for container images.</p>
1012 {{ else if eq .SbomReason "fetch-failed" }}
1113 <p class="text-base-content/70">SBOM data couldn't be loaded. Try refreshing in a minute.</p>
1214 {{ else }}
+6
pkg/appview/templates/partials/vuln-badge.html
···66</span>
77{{ else if .NotScanned }}
88<span class="badge badge-sm badge-ghost" title="No scan recorded yet">Not scanned</span>
99+{{ else if .Skipped }}
1010+{{/* Artifact is intentionally not scanned (helm chart, in-toto, DSSE).
1111+ Render an empty span — the artifact-type badge already tells the user
1212+ this isn't a container image, so a separate "not scannable" pill is
1313+ redundant noise on listings. */}}
1414+<span></span>
915{{ else if .ScanFailed }}
1016<span class="badge badge-sm badge-warning" title="Scanner ran but produced no SBOM">{{ icon "alert-triangle" "size-3" }} Scan failed</span>
1117{{ else if eq .Total 0 }}
+5
pkg/appview/templates/partials/vulns-section.html
···1010 <p class="text-sm">Scan data is stored on the hold. It may be offline or unreachable right now.</p>
1111 </div>
1212 </div>
1313+ {{ else if eq .VulnReason "not-applicable" }}
1414+ <div class="py-8 text-sm text-base-content/70 max-w-prose">
1515+ <p class="font-medium text-base-content">Vulnerability scanning isn't applied to this artifact type</p>
1616+ <p class="mt-1">ATCR's scanner only runs on container images. This artifact was intentionally skipped.</p>
1717+ </div>
1318 {{ else if eq .VulnReason "fetch-failed" }}
1419 <div class="py-8 text-sm text-base-content/70 max-w-prose">
1520 <p class="font-medium text-base-content">Scan data couldn't be loaded</p>
···6363 Duration time.Duration `json:"duration"`
6464}
65656666+// BackfillConfigCandidate identifies one manifest that's missing its image
6767+// config record on the hold.
6868+type BackfillConfigCandidate struct {
6969+ ManifestURI string `json:"manifestUri"`
7070+ UserDID string `json:"userDid"`
7171+ Digest string `json:"digest"`
7272+}
7373+7474+// BackfillConfigsPreview is the dry-run output for the image-config backfill.
7575+// No PDS writes or S3 fetches happen during preview — we only check which
7676+// manifest digests already have an io.atcr.hold.image.config record.
7777+type BackfillConfigsPreview struct {
7878+ Missing []BackfillConfigCandidate `json:"missing"`
7979+ MissingCount int `json:"missingCount"`
8080+ PresentCount int `json:"presentCount"`
8181+ ManifestsChecked int `json:"manifestsChecked"`
8282+ UsersAffected int `json:"usersAffected"`
8383+ Duration time.Duration `json:"duration"`
8484+}
8585+6686// GarbageCollector handles cleanup of orphaned blobs from storage
6787type GarbageCollector struct {
6888 pds *pds.HoldPDS
···80100 running bool
8110182102 // Last results (for admin panel display)
8383- lastPreview *GCPreview
8484- lastPreviewAt time.Time
8585- lastResult *GCResult
8686- lastResultAt time.Time
103103+ lastPreview *GCPreview
104104+ lastPreviewAt time.Time
105105+ lastResult *GCResult
106106+ lastResultAt time.Time
107107+ lastBackfillPreview *BackfillConfigsPreview
108108+ lastBackfillPreviewAt time.Time
8710988110 // Progress tracking for background operations
89111 phase string // "manifests", "records", "blobs", "deleting", "complete", "error"
···105127 OrphanedBlobs int64 `json:"orphaned_blobs"`
106128 ReferencedBlobs int64 `json:"referenced_blobs"`
107129 RecordsReconciled int64 `json:"records_reconciled"`
130130+ RecordsSkipped int64 `json:"records_skipped"`
108131 ManifestsChecked int64 `json:"manifests_checked"`
109132 UsersChecked int64 `json:"users_checked"`
110133 Duration time.Duration `json:"duration"`
···174197type GCProgress struct {
175198 Phase string // "manifests", "records", "blobs", "deleting", "complete", "error"
176199 Message string
177177- OperationType string // "preview", "run", "reconcile", "delete-records", "delete-blobs"
200200+ OperationType string // "preview", "run", "reconcile", "delete-records", "delete-blobs", "backfill-configs", "backfill-configs-preview"
178201 Running bool
179202 Error string
180203}
···815838 }
816839}
817840841841+// StartBackfillConfigsPreview launches a dry-run scan that classifies every
842842+// manifest URI referenced from layer records as either already having an
843843+// image config record or missing one. No PDS or S3 writes happen.
844844+func (gc *GarbageCollector) StartBackfillConfigsPreview() bool {
845845+ return gc.startBackground("backfill-configs-preview", "records", "Scanning for manifests missing image config records...", func(ctx context.Context) error {
846846+ _, err := gc.doBackfillConfigsPreview(ctx)
847847+ return err
848848+ })
849849+}
850850+818851// StartBackfillConfigs launches image config backfill in the background.
819852// Creates io.atcr.hold.image.config records for manifests that don't have one yet
820853// by fetching OCI config blobs from S3.
···825858 })
826859}
827860828828-// doBackfillConfigs creates image config records for manifests that are missing them.
829829-func (gc *GarbageCollector) doBackfillConfigs(ctx context.Context) (*GCResult, error) {
861861+// scanBackfillCandidates walks every layer record, dedupes the manifest URIs
862862+// they reference, and bucket each one as already-present or missing an image
863863+// config record. Returns missing candidates and the count of present.
864864+//
865865+// opType is the GC operationType used for progress messages so this helper
866866+// can serve both the preview and the run.
867867+func (gc *GarbageCollector) scanBackfillCandidates(ctx context.Context, opType string) (missing []BackfillConfigCandidate, presentCount int, err error) {
830868 recordsIndex := gc.pds.RecordsIndex()
831869 if recordsIndex == nil {
832832- return nil, fmt.Errorf("records index not available")
870870+ return nil, 0, fmt.Errorf("records index not available")
833871 }
834872835835- // Step 1: Collect unique manifest URIs from layer records
836873 manifestURIs := make(map[string]bool)
837874 cursor := ""
838875 totalScanned := 0
839839-840876 for {
841841- records, nextCursor, err := recordsIndex.ListRecords(atproto.LayerCollection, 1000, cursor, true)
842842- if err != nil {
843843- return nil, fmt.Errorf("list layer records: %w", err)
877877+ records, nextCursor, listErr := recordsIndex.ListRecords(atproto.LayerCollection, 1000, cursor, true)
878878+ if listErr != nil {
879879+ return nil, 0, fmt.Errorf("list layer records: %w", listErr)
844880 }
845845-846881 for _, rec := range records {
847882 totalScanned++
848848- layer, err := gc.decodeLayerRecord(ctx, rec)
849849- if err != nil {
883883+ layer, decodeErr := gc.decodeLayerRecord(ctx, rec)
884884+ if decodeErr != nil {
850885 continue
851886 }
852887 manifestURIs[layer.Manifest] = true
853888 }
854854-855889 if nextCursor == "" {
856890 break
857891 }
···862896 "manifests", len(manifestURIs),
863897 "layersScanned", totalScanned)
864898865865- // Step 2: For each manifest, check if config record exists, create if not
866866- start := time.Now()
867867- result := &GCResult{}
868868- created := int64(0)
869869- skipped := int64(0)
870899 processed := 0
871871- httpClient := &http.Client{Timeout: 30 * time.Second}
872872-873900 for manifestURI := range manifestURIs {
874901 processed++
875902 gc.setProgress("records",
876876- fmt.Sprintf("Backfilling configs (%d/%d manifests)...", processed, len(manifestURIs)),
877877- "backfill-configs")
903903+ fmt.Sprintf("Checking image configs (%d/%d manifests)...", processed, len(manifestURIs)),
904904+ opType)
878905879879- aturi, err := syntax.ParseATURI(manifestURI)
880880- if err != nil {
881881- gc.logger.Warn("Invalid manifest URI", "uri", manifestURI, "error", err)
906906+ aturi, parseErr := syntax.ParseATURI(manifestURI)
907907+ if parseErr != nil {
908908+ gc.logger.Warn("Invalid manifest URI", "uri", manifestURI, "error", parseErr)
882909 continue
883910 }
884884-885911 manifestDigest := "sha256:" + aturi.RecordKey().String()
886912887887- // Check if config record already exists
888888- if _, _, err := gc.pds.GetImageConfigRecord(ctx, manifestDigest); err == nil {
889889- skipped++
913913+ if _, _, getErr := gc.pds.GetImageConfigRecord(ctx, manifestDigest); getErr == nil {
914914+ presentCount++
890915 continue
891916 }
917917+ missing = append(missing, BackfillConfigCandidate{
918918+ ManifestURI: manifestURI,
919919+ UserDID: aturi.Authority().String(),
920920+ Digest: manifestDigest,
921921+ })
922922+ }
923923+ return missing, presentCount, nil
924924+}
892925893893- userDID := aturi.Authority().String()
894894- manifestRkey := aturi.RecordKey().String()
926926+// doBackfillConfigsPreview runs scanBackfillCandidates and stores the result
927927+// for the admin UI to display. The full missing slice is kept in memory but
928928+// rendering is capped via maxPreviewItems in the template layer.
929929+func (gc *GarbageCollector) doBackfillConfigsPreview(ctx context.Context) (*BackfillConfigsPreview, error) {
930930+ start := time.Now()
931931+932932+ missing, presentCount, err := gc.scanBackfillCandidates(ctx, "backfill-configs-preview")
933933+ if err != nil {
934934+ return nil, err
935935+ }
936936+937937+ users := make(map[string]struct{}, len(missing))
938938+ for _, c := range missing {
939939+ users[c.UserDID] = struct{}{}
940940+ }
941941+942942+ missingCount := len(missing)
943943+ display := missing
944944+ if len(display) > maxPreviewItems {
945945+ display = display[:maxPreviewItems]
946946+ }
947947+948948+ preview := &BackfillConfigsPreview{
949949+ Missing: display,
950950+ MissingCount: missingCount,
951951+ PresentCount: presentCount,
952952+ ManifestsChecked: missingCount + presentCount,
953953+ UsersAffected: len(users),
954954+ Duration: time.Since(start),
955955+ }
956956+957957+ gc.mu.Lock()
958958+ gc.lastBackfillPreview = preview
959959+ gc.lastBackfillPreviewAt = time.Now()
960960+ gc.mu.Unlock()
961961+962962+ gc.logger.Info("Image config backfill preview complete",
963963+ "missing", missingCount,
964964+ "present", presentCount,
965965+ "usersAffected", preview.UsersAffected,
966966+ "duration", preview.Duration)
967967+ return preview, nil
968968+}
969969+970970+// doBackfillConfigs creates image config records for manifests that are missing them.
971971+func (gc *GarbageCollector) doBackfillConfigs(ctx context.Context) (*GCResult, error) {
972972+ start := time.Now()
973973+974974+ missing, presentCount, err := gc.scanBackfillCandidates(ctx, "backfill-configs")
975975+ if err != nil {
976976+ return nil, err
977977+ }
978978+979979+ result := &GCResult{RecordsSkipped: int64(presentCount)}
980980+ created := int64(0)
981981+ httpClient := &http.Client{Timeout: 30 * time.Second}
982982+983983+ for i, candidate := range missing {
984984+ gc.setProgress("records",
985985+ fmt.Sprintf("Backfilling configs (%d/%d missing)...", i+1, len(missing)),
986986+ "backfill-configs")
987987+988988+ userDID := candidate.UserDID
989989+ manifestRkey := strings.TrimPrefix(candidate.Digest, "sha256:")
990990+ manifestURI := candidate.ManifestURI
991991+ manifestDigest := candidate.Digest
895992896993 pdsEndpoint, err := atproto.ResolveDIDToPDS(ctx, userDID)
897994 if err != nil {
···899996 continue
900997 }
901998902902- // Fetch manifest via getRecord to get config digest
903999 reqURL := fmt.Sprintf("%s/xrpc/com.atproto.repo.getRecord?repo=%s&collection=%s&rkey=%s",
9041000 pdsEndpoint,
9051001 url.QueryEscape(userDID),
···9381034 continue
9391035 }
9401036941941- // Fetch config blob from S3
9421037 configBytes, err := gc.s3.GetBytes(ctx, s3.BlobPath(manifest.Config.Digest))
9431038 if err != nil {
9441039 gc.logger.Warn("Failed to fetch config blob", "digest", manifest.Config.Digest, "error", err)
9451040 continue
9461041 }
9471042948948- // Create image config record
9491043 configRecord := atproto.NewImageConfigRecord(manifestURI, string(configBytes))
9501044 if _, _, err := gc.pds.CreateImageConfigRecord(ctx, configRecord, manifestDigest); err != nil {
9511045 gc.logger.Warn("Failed to create image config record", "manifest", manifestURI, "error", err)
···9631057 gc.lastResultAt = time.Now()
9641058 gc.mu.Unlock()
9651059966966- gc.logger.Info("Image config backfill complete", "created", created, "skipped", skipped)
10601060+ gc.logger.Info("Image config backfill complete",
10611061+ "created", created,
10621062+ "skipped", result.RecordsSkipped)
9671063 return result, nil
9681064}
9691065···15081604 gc.mu.Lock()
15091605 defer gc.mu.Unlock()
15101606 return gc.lastResult, gc.lastResultAt
16071607+}
16081608+16091609+// LastBackfillPreview returns the most recent image-config backfill preview
16101610+// and when it was generated.
16111611+func (gc *GarbageCollector) LastBackfillPreview() (*BackfillConfigsPreview, time.Time) {
16121612+ gc.mu.Lock()
16131613+ defer gc.mu.Unlock()
16141614+ return gc.lastBackfillPreview, gc.lastBackfillPreviewAt
15111615}
1512161615131617// IsRunning returns whether a GC operation is currently in progress
+62-12
pkg/hold/pds/scan_broadcaster.go
···79798080// ScannerMessage is a message received from scanner over WebSocket
8181type ScannerMessage struct {
8282- Type string `json:"type"` // "ack", "result", "error"
8282+ Type string `json:"type"` // "ack", "result", "error", "skipped"
8383 Seq int64 `json:"seq"` // Job sequence number
8484 SBOM string `json:"sbom,omitempty"`
8585 VulnReport string `json:"vulnReport,omitempty"`
8686 Summary *VulnerabilitySummary `json:"summary,omitempty"`
8787 Error string `json:"error,omitempty"`
8888+ Reason string `json:"reason,omitempty"` // Populated for "skipped" messages
8889}
89909091// VulnerabilitySummary contains counts of vulnerabilities by severity
···447448 sb.handleResult(sub, msg)
448449 case "error":
449450 sb.handleError(sub, msg)
451451+ case "skipped":
452452+ sb.handleSkipped(sub, msg)
450453 default:
451454 slog.Warn("Unknown scanner message type",
452455 "type", msg.Type,
···579582 "total", msg.Summary.Total)
580583}
581584582582-// handleError marks a job as failed and creates a scan record so the proactive
583583-// scanner treats it as "stale" rather than "never scanned" (avoids retry loops).
585585+// handleError marks a job as failed and creates a scan record so the stale
586586+// loop won't immediately retry. Failed records still get retried on the
587587+// rescan interval since failures may be transient (network, OOM, etc.).
584588func (sb *ScanBroadcaster) handleError(sub *ScanSubscriber, msg ScannerMessage) {
585589 ctx := context.Background()
586590587587- // Get job details to create failure scan record
588591 var manifestDigest, repository, userDID string
589592 err := sb.db.QueryRow(`
590593 SELECT manifest_digest, repository, user_did
···594597 slog.Error("Failed to get job details for failure record",
595598 "seq", msg.Seq, "error", err)
596599 } else {
597597- // Create a scan record with zero counts and nil blobs — marks it as
598598- // "scanned" so the proactive scheduler won't retry until rescan interval.
599599- // Nil blobs signal failure to the appview (successful scans always have blobs).
600600- scanRecord := atproto.NewScanRecord(
600600+ scanRecord := atproto.NewFailedScanRecord(
601601 manifestDigest, repository, userDID,
602602- nil, nil, // no SBOM or vuln report — signals scan failure
603603- 0, 0, 0, 0, 0,
602602+ msg.Error,
604603 "atcr-scanner-v1.0.0",
605604 )
606605 if _, _, err := sb.pds.CreateScanRecord(ctx, scanRecord); err != nil {
···609608 }
610609 }
611610612612- // Mark job as failed
613611 _, err = sb.db.Exec(`
614612 UPDATE scan_jobs SET status = 'failed', completed_at = ?
615613 WHERE seq = ?
···620618 "error", err)
621619 }
622620623623- // Remove from in-flight tracking and wake dispatch loop
624621 sb.removeInflight(manifestDigest)
625622 sb.signalCompletion()
626623···630627 "error", msg.Error)
631628}
632629630630+// handleSkipped marks a job complete and creates a scan record with
631631+// status="skipped". The stale-scan loop will leave these records alone — the
632632+// outcome won't change until the scanner gains support for the artifact type.
633633+func (sb *ScanBroadcaster) handleSkipped(sub *ScanSubscriber, msg ScannerMessage) {
634634+ ctx := context.Background()
635635+636636+ var manifestDigest, repository, userDID string
637637+ err := sb.db.QueryRow(`
638638+ SELECT manifest_digest, repository, user_did
639639+ FROM scan_jobs WHERE seq = ?
640640+ `, msg.Seq).Scan(&manifestDigest, &repository, &userDID)
641641+ if err != nil {
642642+ slog.Error("Failed to get job details for skip record",
643643+ "seq", msg.Seq, "error", err)
644644+ } else {
645645+ scanRecord := atproto.NewSkippedScanRecord(
646646+ manifestDigest, repository, userDID,
647647+ msg.Reason,
648648+ "atcr-scanner-v1.0.0",
649649+ )
650650+ if _, _, err := sb.pds.CreateScanRecord(ctx, scanRecord); err != nil {
651651+ slog.Error("Failed to store skipped scan record",
652652+ "seq", msg.Seq, "error", err)
653653+ }
654654+ }
655655+656656+ _, err = sb.db.Exec(`
657657+ UPDATE scan_jobs SET status = 'completed', completed_at = ?
658658+ WHERE seq = ?
659659+ `, time.Now(), msg.Seq)
660660+ if err != nil {
661661+ slog.Error("Failed to mark scan job as completed (skipped)",
662662+ "seq", msg.Seq,
663663+ "error", err)
664664+ }
665665+666666+ sb.removeInflight(manifestDigest)
667667+ sb.signalCompletion()
668668+669669+ slog.Info("Scan job skipped",
670670+ "seq", msg.Seq,
671671+ "subscriberId", sub.id,
672672+ "reason", msg.Reason)
673673+}
674674+633675// drainPendingJobs sends pending/timed-out jobs to a newly connected scanner.
634676// Collects all pending rows first, closes cursor, then assigns and dispatches
635677// to avoid holding a SELECT cursor open during UPDATEs (prevents SQLite BUSY).
···10471089 // Fetch the actual scan record to check staleness
10481090 _, scanRecord, err := sb.pds.GetScanRecord(ctx, manifestDigest)
10491091 if err != nil {
10921092+ sb.removeInflight(manifestDigest)
10931093+ continue
10941094+ }
10951095+10961096+ // Permanently-skipped records (helm charts, in-toto, etc.) won't
10971097+ // change outcome on retry — leave them alone. Failed records still
10981098+ // get retried since failures may be transient.
10991099+ if scanRecord.Status == atproto.ScanStatusSkipped {
10501100 sb.removeInflight(manifestDigest)
10511101 continue
10521102 }
+7
scanner/internal/client/hold.go
···194194 c.sendJSON(scanner.ErrorMessage{Type: "error", Seq: seq, Error: errMsg})
195195}
196196197197+// SendSkipped sends a skipped message for an artifact the scanner intentionally
198198+// won't process (e.g., helm charts). Distinct from SendError so the hold can
199199+// distinguish a permanent skip from a retryable failure.
200200+func (c *HoldClient) SendSkipped(seq int64, reason string) {
201201+ c.sendJSON(scanner.SkippedMessage{Type: "skipped", Seq: seq, Reason: reason})
202202+}
203203+197204func (c *HoldClient) sendJSON(v any) {
198205 c.mu.Lock()
199206 defer c.mu.Unlock()
+30-11
scanner/internal/scan/worker.go
···4455import (
66 "context"
77+ "errors"
78 "fmt"
89 "log/slog"
910 "os"
1011 "runtime"
1111- "strings"
1212 "sync"
1313 "time"
1414···1717 "atcr.io/scanner/internal/config"
1818 "atcr.io/scanner/internal/queue"
1919)
2020+2121+// SkipError is returned by processJob when the scanner intentionally bypasses
2222+// an artifact type it can't analyze (helm charts, in-toto attestations, DSSE).
2323+// The worker dispatches these to hold via SendSkipped so the hold can mark
2424+// the scan record "skipped" instead of "failed". Skipped records are never
2525+// retried by the stale-scan loop; failures are.
2626+type SkipError struct {
2727+ Reason string
2828+}
2929+3030+func (e *SkipError) Error() string { return "skipped: " + e.Reason }
20312132// WorkerPool manages a pool of scan workers
2233type WorkerPool struct {
···9410595106 result, err := wp.processJob(ctx, job)
96107 if err != nil {
9797- logLevel := slog.LevelError
9898- if strings.HasPrefix(err.Error(), "skipped:") {
9999- logLevel = slog.LevelInfo
108108+ var skipErr *SkipError
109109+ if errors.As(err, &skipErr) {
110110+ slog.Info("Scan job skipped",
111111+ "worker_id", id,
112112+ "repository", job.Repository,
113113+ "reason", skipErr.Reason)
114114+ wp.client.SendSkipped(job.Seq, skipErr.Reason)
115115+ } else {
116116+ slog.Error("Scan job failed",
117117+ "worker_id", id,
118118+ "repository", job.Repository,
119119+ "error", err)
120120+ wp.client.SendError(job.Seq, err.Error())
100121 }
101101- slog.Log(ctx, logLevel, "Scan job failed",
102102- "worker_id", id,
103103- "repository", job.Repository,
104104- "error", err)
105105- wp.client.SendError(job.Seq, err.Error())
106122 } else {
107123 wp.client.SendResult(job.Seq, result)
108124···138154func (wp *WorkerPool) processJob(ctx context.Context, job *scanner.ScanJob) (*scanner.ScanResult, error) {
139155 startTime := time.Now()
140156141141- // Skip non-container OCI artifacts (Helm charts, WASM modules, etc.)
157157+ // Skip non-container OCI artifacts (Helm charts, in-toto, DSSE, etc.).
158158+ // Returning *SkipError tells the worker dispatch loop to send a "skipped"
159159+ // message rather than an "error" — the hold marks these records as
160160+ // permanently skipped and won't retry them on the rescan interval.
142161 if unscannableConfigTypes[job.Config.MediaType] {
143143- return nil, fmt.Errorf("skipped: unscannable artifact type %s", job.Config.MediaType)
162162+ return nil, &SkipError{Reason: fmt.Sprintf("unscannable artifact type %s", job.Config.MediaType)}
144163 }
145164146165 // Ensure tmp dir exists
+11
scanner/types.go
···8282 Seq int64 `json:"seq"`
8383 Error string `json:"error"`
8484}
8585+8686+// SkippedMessage is sent from scanner to hold when an artifact is intentionally
8787+// not scanned (e.g., helm charts, in-toto attestations). Distinct from
8888+// ErrorMessage so the hold can mark the scan record as "skipped" rather than
8989+// "failed" — the stale-scan loop will leave skipped records alone since the
9090+// outcome won't change without a code change in the scanner.
9191+type SkippedMessage struct {
9292+ Type string `json:"type"` // "skipped"
9393+ Seq int64 `json:"seq"`
9494+ Reason string `json:"reason"`
9595+}