···13131414WORKDIR /workspace
15151616-# Copy core dependency (from replace directive in go.mod)
1717-COPY core/ core/
1818-1919-# Copy loom go mod files and download deps
2020-COPY loom/go.mod loom/go.sum loom/
2121-WORKDIR /workspace/loom
1616+# Copy go mod files and download deps
1717+COPY go.mod go.sum ./
2218RUN go mod download
23192424-# Copy loom source code
2525-COPY loom/api/ api/
2626-COPY loom/cmd/ cmd/
2727-COPY loom/internal/ internal/
2020+# Copy source code
2121+COPY api/ api/
2222+COPY cmd/ cmd/
2323+COPY internal/ internal/
28242925# Build runner (static, no CGO)
3026# Use -s -w to strip debug symbols and reduce binary size
···41374238# Unified image with both binaries
4339FROM gcr.io/distroless/base-debian13:nonroot
4444-COPY --from=builder /workspace/loom/manager /manager
4545-COPY --from=builder /workspace/loom/loom-runner /loom-runner
4040+COPY --from=builder /workspace/manager /manager
4141+COPY --from=builder /workspace/loom-runner /loom-runner
46424743LABEL org.opencontainers.image.title="Loom" \
4844 org.opencontainers.image.description="Kubernetes Operator for Tangled Spindles " \
+3-3
Makefile
···171171# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
172172.PHONY: docker-build
173173docker-build: setup-buildx ## Build and push multi-arch docker image.
174174- cd .. && $(CONTAINER_TOOL) buildx build \
174174+ $(CONTAINER_TOOL) buildx build \
175175 --builder loom-builder \
176176 --platform=linux/amd64,linux/arm64 \
177177 --push \
178178 --tag ${IMG} \
179179- -f loom/Dockerfile .
179179+ -f Dockerfile .
180180181181.PHONY: docker-build-local
182182docker-build-local: ## Build docker image for local arch only (no push).
183183- cd .. && $(CONTAINER_TOOL) build -f loom/Dockerfile -t ${IMG} .
183183+ $(CONTAINER_TOOL) build -t ${IMG} .
184184185185.PHONY: setup-buildx
186186setup-buildx: ## Set up buildx builder with credential access for multi-arch builds
+6
api/v1alpha1/spindleset_types.go
···214214 // +optional
215215 Affinity *corev1.Affinity `json:"affinity,omitempty"`
216216217217+ // ImagePullSecrets is a list of secret names for pulling container images.
218218+ // Specified directly on the pod spec to avoid kubelet races when resolving
219219+ // secrets from the service account.
220220+ // +optional
221221+ ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`
222222+217223 // RegistryCredentialsSecret is the name of a kubernetes.io/dockerconfigjson secret
218224 // containing registry credentials for buildah to use when pushing images.
219225 // If specified, the secret is mounted at /home/user/.docker/config.json.
+5
api/v1alpha1/zz_generated.deepcopy.go
···264264 *out = new(v1.Affinity)
265265 (*in).DeepCopyInto(*out)
266266 }
267267+ if in.ImagePullSecrets != nil {
268268+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
269269+ *out = make([]string, len(*in))
270270+ copy(*out, *in)
271271+ }
267272}
268273269274// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SpindleTemplate.
···110110}
111111112112func run() error {
113113- // Read workflow spec from environment
114114- workflowJSON := os.Getenv("LOOM_WORKFLOW_SPEC")
113113+ // Read workflow spec from file (preferred) or environment variable (fallback).
114114+ // File-based reading keeps the Job object small in etcd.
115115+ var workflowJSON string
116116+ if specPath := os.Getenv("LOOM_WORKFLOW_SPEC_PATH"); specPath != "" {
117117+ data, err := os.ReadFile(specPath)
118118+ if err != nil {
119119+ return fmt.Errorf("failed to read workflow spec from %s: %w", specPath, err)
120120+ }
121121+ workflowJSON = string(data)
122122+ } else {
123123+ workflowJSON = os.Getenv("LOOM_WORKFLOW_SPEC")
124124+ }
115125 if workflowJSON == "" {
116116- return fmt.Errorf("LOOM_WORKFLOW_SPEC environment variable not set")
126126+ return fmt.Errorf("workflow spec not provided: set LOOM_WORKFLOW_SPEC_PATH or LOOM_WORKFLOW_SPEC")
117127 }
118128119129 var workflow loomv1alpha1.WorkflowSpec
+8
config/crd/bases/loom.j5t.io_spindlesets.yaml
···11921192 x-kubernetes-list-type: atomic
11931193 type: object
11941194 type: object
11951195+ imagePullSecrets:
11961196+ description: |-
11971197+ ImagePullSecrets is a list of secret names for pulling container images.
11981198+ Specified directly on the pod spec to avoid kubelet races when resolving
11991199+ secrets from the service account.
12001200+ items:
12011201+ type: string
12021202+ type: array
11951203 registryCredentialsSecret:
11961204 description: |-
11971205 RegistryCredentialsSecret is the name of a kubernetes.io/dockerconfigjson secret
···12121313 # Template for spindle job pods
1414 template:
1515+ # imagePullSecrets specified directly on job pod specs to avoid
1616+ # kubelet races when resolving secrets from the service account
1717+ imagePullSecrets:
1818+ {{- range .Values.imagePullSecrets }}
1919+ - {{ .name }}
2020+ {{- end }}
1521 # Resource profiles are matched against workflow architecture and node labels.
1622 # The first profile matching the workflow's architecture is selected.
1723 # Profile's nodeSelector and resources are applied to the job pod.
+51
internal/controller/spindleset_controller.go
···18181919import (
2020 "context"
2121+ "encoding/json"
2122 "fmt"
2223 "strings"
2324 "sync"
···7576// +kubebuilder:rbac:groups="",resources=nodes,verbs=list;watch
7677// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch
7778// +kubebuilder:rbac:groups="",resources=pods/log,verbs=get
7979+// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch;delete
7880// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete
7981// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete
8082···530532 return fmt.Errorf("failed to check for existing job: %w", err)
531533 }
532534535535+ // Create a ConfigMap with the workflow spec JSON to keep the Job object small in etcd.
536536+ // The runner reads the spec from a mounted file instead of an env var.
537537+ workflowSpecJSON, err := json.Marshal(workflowSpec)
538538+ if err != nil {
539539+ return fmt.Errorf("failed to marshal workflow spec for %s: %w", workflowSpec.Name, err)
540540+ }
541541+542542+ configMapName := jobName + "-spec"
543543+ if len(configMapName) > 63 {
544544+ configMapName = configMapName[:63]
545545+ }
546546+547547+ existingCM := &corev1.ConfigMap{}
548548+ err = r.Get(ctx, client.ObjectKey{
549549+ Name: configMapName,
550550+ Namespace: spindleSet.Namespace,
551551+ }, existingCM)
552552+553553+ if err != nil {
554554+ if apierrors.IsNotFound(err) {
555555+ cm := &corev1.ConfigMap{
556556+ ObjectMeta: metav1.ObjectMeta{
557557+ Name: configMapName,
558558+ Namespace: spindleSet.Namespace,
559559+ Labels: map[string]string{
560560+ "loom.j5t.io/spindleset": spindleSet.Name,
561561+ "loom.j5t.io/pipeline-id": pipelineRun.PipelineID,
562562+ "loom.j5t.io/workflow": workflowSpec.Name,
563563+ },
564564+ },
565565+ Data: map[string]string{
566566+ "workflow-spec.json": string(workflowSpecJSON),
567567+ },
568568+ }
569569+ if err := controllerutil.SetControllerReference(spindleSet, cm, r.Scheme); err != nil {
570570+ return fmt.Errorf("failed to set controller reference on configmap: %w", err)
571571+ }
572572+ logger.Info("Creating ConfigMap for workflow spec", "configmap", configMapName)
573573+ if err := r.retryCreate(ctx, cm); err != nil {
574574+ if !apierrors.IsAlreadyExists(err) {
575575+ return fmt.Errorf("failed to create configmap for workflow %s: %w", workflowSpec.Name, err)
576576+ }
577577+ }
578578+ } else {
579579+ return fmt.Errorf("failed to check for existing configmap: %w", err)
580580+ }
581581+ }
582582+533583 // Convert workflow steps to jobbuilder format
534584 jobSteps := make([]jobbuilder.WorkflowStep, 0, len(workflowSpec.Steps))
535585 for _, step := range workflowSpec.Steps {
···554604 SkipClone: pipelineRun.SkipClone,
555605 SecretName: secretName,
556606 SecretKeys: secretKeys,
607607+ ConfigMapName: configMapName,
557608 Template: spindleSet.Spec.Template,
558609 Namespace: spindleSet.Namespace,
559610 OperatorAddr: r.OperatorAddr,
+83-19
internal/jobbuilder/job_template.go
···5858 // If empty, no secrets are injected
5959 SecretName string
60606161+ // ConfigMapName is the name of the ConfigMap containing the workflow spec JSON.
6262+ // When set, the spec is mounted as a file instead of passed as an env var,
6363+ // keeping the Job object small in etcd.
6464+ ConfigMapName string
6565+6166 // SecretKeys is the list of environment variable names that contain secrets.
6267 // These are passed to the runner for log masking.
6368 SecretKeys []string
···143148 return nil, fmt.Errorf("spindleset name is required")
144149 }
145150146146- // Marshal workflow spec to JSON for the runner binary
147147- workflowSpecJSON, err := json.Marshal(config.WorkflowSpec)
148148- if err != nil {
149149- return nil, fmt.Errorf("failed to marshal workflow spec: %w", err)
151151+ // Marshal workflow spec to JSON for the runner binary (only needed when not using ConfigMap)
152152+ var workflowSpecJSON []byte
153153+ if config.ConfigMapName == "" {
154154+ var err error
155155+ workflowSpecJSON, err = json.Marshal(config.WorkflowSpec)
156156+ if err != nil {
157157+ return nil, fmt.Errorf("failed to marshal workflow spec: %w", err)
158158+ }
150159 }
151160152161 // Select resource profile based on workflow architecture and available nodes
···251260252261 VolumeMounts: buildRunnerVolumeMounts(config),
253262254254- Env: append(buildEnvironmentVariables(config),
255255- corev1.EnvVar{
256256- Name: "LOOM_WORKFLOW_SPEC",
257257- Value: string(workflowSpecJSON),
258258- },
259259- corev1.EnvVar{
260260- Name: "LOOM_SECRET_KEYS",
261261- Value: strings.Join(config.SecretKeys, ","),
262262- },
263263- corev1.EnvVar{
264264- Name: "LOOM_OPERATOR_ADDR",
265265- Value: config.OperatorAddr,
266266- },
267267- ),
263263+ Env: buildContainerEnv(config, workflowSpecJSON),
268264269265 // Inject repository secrets via envFrom if available
270266 EnvFrom: buildEnvFromSources(config),
···280276 Affinity: finalAffinity,
281277282278 // Use dedicated service account with minimal permissions
283283- // Note: imagePullSecrets should be attached to this SA, not the controller SA
284279 ServiceAccountName: "loom-spindle-job-runner",
280280+281281+ // Specify imagePullSecrets directly on the pod spec to avoid
282282+ // a kubelet race where SA-attached secrets aren't resolved
283283+ // in time for the first image pull attempt
284284+ ImagePullSecrets: buildImagePullSecrets(config),
285285 },
286286 },
287287 },
···290290 return job, nil
291291}
292292293293+// buildImagePullSecrets converts template secret names to LocalObjectReference list.
294294+func buildImagePullSecrets(config WorkflowConfig) []corev1.LocalObjectReference {
295295+ var refs []corev1.LocalObjectReference
296296+ for _, name := range config.Template.ImagePullSecrets {
297297+ refs = append(refs, corev1.LocalObjectReference{Name: name})
298298+ }
299299+ return refs
300300+}
301301+293302// buildEnvironmentVariables creates the environment variables for the runner container.
294303// All environment variables come from WorkflowSpec.Environment, which includes:
295304// - Engine-specific vars (PATH, TANGLED_ARCHITECTURE, HOME) set in InitWorkflow
···305314 return env
306315}
307316317317+// buildContainerEnv builds the environment variables for the runner container.
318318+// When a ConfigMap is used, the workflow spec is referenced via LOOM_WORKFLOW_SPEC_PATH
319319+// instead of embedding the full JSON in LOOM_WORKFLOW_SPEC.
320320+func buildContainerEnv(config WorkflowConfig, workflowSpecJSON []byte) []corev1.EnvVar {
321321+ env := buildEnvironmentVariables(config)
322322+323323+ if config.ConfigMapName != "" {
324324+ env = append(env, corev1.EnvVar{
325325+ Name: "LOOM_WORKFLOW_SPEC_PATH",
326326+ Value: "/runner-config/workflow-spec.json",
327327+ })
328328+ } else {
329329+ env = append(env, corev1.EnvVar{
330330+ Name: "LOOM_WORKFLOW_SPEC",
331331+ Value: string(workflowSpecJSON),
332332+ })
333333+ }
334334+335335+ env = append(env,
336336+ corev1.EnvVar{
337337+ Name: "LOOM_SECRET_KEYS",
338338+ Value: strings.Join(config.SecretKeys, ","),
339339+ },
340340+ corev1.EnvVar{
341341+ Name: "LOOM_OPERATOR_ADDR",
342342+ Value: config.OperatorAddr,
343343+ },
344344+ )
345345+346346+ return env
347347+}
348348+308349// buildEnvFromSources creates EnvFromSource entries for secrets injection.
309350func buildEnvFromSources(config WorkflowConfig) []corev1.EnvFromSource {
310351 var envFrom []corev1.EnvFromSource
···524565 },
525566 }
526567568568+ // Mount workflow spec ConfigMap if specified
569569+ if config.ConfigMapName != "" {
570570+ mounts = append(mounts, corev1.VolumeMount{
571571+ Name: "workflow-spec",
572572+ MountPath: "/runner-config",
573573+ ReadOnly: true,
574574+ })
575575+ }
576576+527577 // Mount registry credentials if specified
528578 if config.Template.RegistryCredentialsSecret != "" {
529579 mounts = append(mounts, corev1.VolumeMount{
···581631 EmptyDir: &corev1.EmptyDirVolumeSource{},
582632 },
583633 },
634634+ }
635635+636636+ // Add workflow spec ConfigMap volume if specified
637637+ if config.ConfigMapName != "" {
638638+ volumes = append(volumes, corev1.Volume{
639639+ Name: "workflow-spec",
640640+ VolumeSource: corev1.VolumeSource{
641641+ ConfigMap: &corev1.ConfigMapVolumeSource{
642642+ LocalObjectReference: corev1.LocalObjectReference{
643643+ Name: config.ConfigMapName,
644644+ },
645645+ },
646646+ },
647647+ })
584648 }
585649586650 // Add registry credentials volume if specified
+118
internal/jobbuilder/job_template_test.go
···326326 }
327327}
328328329329+func TestBuildJobConfigMapVolume(t *testing.T) {
330330+ config := WorkflowConfig{
331331+ WorkflowName: "test-workflow",
332332+ PipelineID: "test-pipeline",
333333+ SpindleSetName: "test-spindleset",
334334+ Image: "test:latest",
335335+ Architecture: "amd64",
336336+ WorkflowSpec: loomv1alpha1.WorkflowSpec{Name: "test"},
337337+ Namespace: "default",
338338+ ConfigMapName: "spindle-test-pipeline-test-workflow-spec",
339339+ }
340340+ nodes := makeNodeList(map[string]string{"kubernetes.io/arch": "amd64"})
341341+342342+ job, err := BuildJob(config, nodes)
343343+ if err != nil {
344344+ t.Fatalf("BuildJob() error = %v", err)
345345+ }
346346+347347+ container := job.Spec.Template.Spec.Containers[0]
348348+349349+ // Should have LOOM_WORKFLOW_SPEC_PATH, not LOOM_WORKFLOW_SPEC
350350+ var hasSpecPath, hasSpecInline bool
351351+ for _, env := range container.Env {
352352+ if env.Name == "LOOM_WORKFLOW_SPEC_PATH" {
353353+ hasSpecPath = true
354354+ if env.Value != "/runner-config/workflow-spec.json" {
355355+ t.Errorf("LOOM_WORKFLOW_SPEC_PATH = %q, want /runner-config/workflow-spec.json", env.Value)
356356+ }
357357+ }
358358+ if env.Name == "LOOM_WORKFLOW_SPEC" {
359359+ hasSpecInline = true
360360+ }
361361+ }
362362+ if !hasSpecPath {
363363+ t.Error("expected LOOM_WORKFLOW_SPEC_PATH env var when ConfigMapName is set")
364364+ }
365365+ if hasSpecInline {
366366+ t.Error("LOOM_WORKFLOW_SPEC env var should not be set when ConfigMapName is set")
367367+ }
368368+369369+ // Should have workflow-spec volume
370370+ var hasVolume bool
371371+ for _, v := range job.Spec.Template.Spec.Volumes {
372372+ if v.Name == "workflow-spec" {
373373+ hasVolume = true
374374+ if v.ConfigMap == nil {
375375+ t.Error("workflow-spec volume should be a ConfigMap volume")
376376+ } else if v.ConfigMap.Name != config.ConfigMapName {
377377+ t.Errorf("ConfigMap name = %q, want %q", v.ConfigMap.Name, config.ConfigMapName)
378378+ }
379379+ }
380380+ }
381381+ if !hasVolume {
382382+ t.Error("expected workflow-spec volume when ConfigMapName is set")
383383+ }
384384+385385+ // Should have volume mount at /runner-config
386386+ var hasMount bool
387387+ for _, m := range container.VolumeMounts {
388388+ if m.Name == "workflow-spec" {
389389+ hasMount = true
390390+ if m.MountPath != "/runner-config" {
391391+ t.Errorf("mount path = %q, want /runner-config", m.MountPath)
392392+ }
393393+ if !m.ReadOnly {
394394+ t.Error("workflow-spec mount should be read-only")
395395+ }
396396+ }
397397+ }
398398+ if !hasMount {
399399+ t.Error("expected workflow-spec volume mount when ConfigMapName is set")
400400+ }
401401+}
402402+403403+func TestBuildJobWithoutConfigMap(t *testing.T) {
404404+ config := WorkflowConfig{
405405+ WorkflowName: "test-workflow",
406406+ PipelineID: "test-pipeline",
407407+ SpindleSetName: "test-spindleset",
408408+ Image: "test:latest",
409409+ Architecture: "amd64",
410410+ WorkflowSpec: loomv1alpha1.WorkflowSpec{Name: "test"},
411411+ Namespace: "default",
412412+ }
413413+ nodes := makeNodeList(map[string]string{"kubernetes.io/arch": "amd64"})
414414+415415+ job, err := BuildJob(config, nodes)
416416+ if err != nil {
417417+ t.Fatalf("BuildJob() error = %v", err)
418418+ }
419419+420420+ container := job.Spec.Template.Spec.Containers[0]
421421+422422+ // Should have LOOM_WORKFLOW_SPEC, not LOOM_WORKFLOW_SPEC_PATH
423423+ var hasSpecInline, hasSpecPath bool
424424+ for _, env := range container.Env {
425425+ if env.Name == "LOOM_WORKFLOW_SPEC" {
426426+ hasSpecInline = true
427427+ }
428428+ if env.Name == "LOOM_WORKFLOW_SPEC_PATH" {
429429+ hasSpecPath = true
430430+ }
431431+ }
432432+ if !hasSpecInline {
433433+ t.Error("expected LOOM_WORKFLOW_SPEC env var when ConfigMapName is empty")
434434+ }
435435+ if hasSpecPath {
436436+ t.Error("LOOM_WORKFLOW_SPEC_PATH should not be set when ConfigMapName is empty")
437437+ }
438438+439439+ // Should NOT have workflow-spec volume
440440+ for _, v := range job.Spec.Template.Spec.Volumes {
441441+ if v.Name == "workflow-spec" {
442442+ t.Error("workflow-spec volume should not exist when ConfigMapName is empty")
443443+ }
444444+ }
445445+}
446446+329447func TestBuildJob(t *testing.T) {
330448 tests := []struct {
331449 name string