Kubernetes Operator for Tangled Spindles
15
fork

Configure Feed

Select the types of activity you want to include in your feed.

matrix expansion and grpc for workflows

+2718 -504
+23
.tangled/workflows/release-helm.yaml
··· 1 + when: 2 + - event: ["push"] 3 + tag: ["v*"] 4 + 5 + engine: kubernetes 6 + image: alpine/helm:latest 7 + architecture: amd64 8 + 9 + environment: 10 + IMAGE_REGISTRY: buoy.cr 11 + 12 + steps: 13 + - name: Login to registry 14 + command: | 15 + echo "${APP_PASSWORD}" | helm registry login \ 16 + -u "${TANGLED_REPO_DID}" \ 17 + --password-stdin \ 18 + ${IMAGE_REGISTRY} 19 + 20 + - name: Package and push Helm chart 21 + command: | 22 + helm package helm/loom --version ${TANGLED_REF_NAME#v} --app-version ${TANGLED_REF_NAME#v} 23 + helm push loom-${TANGLED_REF_NAME#v}.tgz oci://${IMAGE_REGISTRY}/${TANGLED_REPO_DID}/charts
+2 -1
.tangled/workflows/release.yaml
··· 10 10 architecture: amd64 11 11 12 12 environment: 13 - IMAGE_REGISTRY: atcr.io 13 + IMAGE_REGISTRY: buoy.cr 14 14 15 15 steps: 16 16 - name: Login to registry ··· 30 30 31 31 buildah push ${IMAGE_REGISTRY}/${TANGLED_REPO_DID}/${TANGLED_REPO_NAME}:latest 32 32 buildah push ${IMAGE_REGISTRY}/${TANGLED_REPO_DID}/${TANGLED_REPO_NAME}:${TANGLED_REF_NAME} 33 +
-25
.tangled/workflows/workflow-amd64.yaml
··· 1 - when: 2 - - event: ["push"] 3 - tag: ["v*"] 4 - branch : ["*"] 5 - 6 - engine: kubernetes 7 - image: golang:1.25-trixie 8 - architecture: amd64 9 - 10 - environment: 11 - IMAGE_REGISTRY: atcr.io 12 - 13 - steps: 14 - - name: test environment vars 15 - command: | 16 - printenv 17 - 18 - - name: Login to registry 19 - command: | 20 - echo "${APP_PASSWORD}" | buildah login \ 21 - -u "${TANGLED_REPO_DID}" \ 22 - --password-stdin \ 23 - ${IMAGE_REGISTRY} 24 - 25 -
-21
.tangled/workflows/workflow-arm64.yaml
··· 1 - when: 2 - - event: ["push"] 3 - tag: ["v*"] 4 - branch : ["*"] 5 - 6 - engine: kubernetes 7 - image: golang:1.25-trixie 8 - architecture: arm64 9 - 10 - steps: 11 - - name: build manager binary 12 - command: | 13 - make build 14 - 15 - - name: verify build artifacts 16 - command: | 17 - ls -lh bin/ 18 - 19 - - name: hello 20 - command: | 21 - echo "hello"
+2 -2
Dockerfile
··· 1 1 # Build both binaries 2 2 # Use BUILDPLATFORM so Go runs natively, cross-compile for target arch 3 - FROM --platform=$BUILDPLATFORM golang:1.25 AS builder 3 + FROM --platform=$BUILDPLATFORM golang:1.25-trixie AS builder 4 4 5 5 ARG TARGETOS 6 6 ARG TARGETARCH ··· 40 40 go build -a -ldflags='-s -w' -o manager ./cmd/controller 41 41 42 42 # Unified image with both binaries 43 - FROM gcr.io/distroless/base-debian12:nonroot 43 + FROM gcr.io/distroless/base-debian13:nonroot 44 44 COPY --from=builder /workspace/loom/manager /manager 45 45 COPY --from=builder /workspace/loom/loom-runner /loom-runner 46 46
+10 -23
Makefile
··· 3 3 # To re-generate a bundle for another specific version without changing the standard setup, you can: 4 4 # - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2) 5 5 # - use environment variables to overwrite this value (e.g export VERSION=0.0.2) 6 - VERSION ?= 0.0.1 6 + VERSION ?= 0.1.5 7 7 8 8 # CHANNELS define the bundle channels used in the bundle. 9 9 # Add a new line here if you would like to change its default config. (E.g CHANNELS = "candidate,fast,stable") ··· 50 50 # This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit. 51 51 OPERATOR_SDK_VERSION ?= v1.41.1 52 52 # Image URL to use all building/pushing image targets 53 - IMG ?= atcr.io/evan.jarrett.net/loom:latest 53 + IMG ?= buoy.cr/evan.jarrett.net/loom:latest 54 54 55 55 # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) 56 56 ifeq (,$(shell go env GOBIN)) ··· 99 99 .PHONY: generate 100 100 generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. 101 101 $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." 102 + 103 + .PHONY: proto 104 + proto: ## Generate protobuf and gRPC code. 105 + buf generate 102 106 103 107 .PHONY: fmt 104 108 fmt: ## Run go fmt against code. ··· 211 215 212 216 .PHONY: test-registry-auth 213 217 test-registry-auth: ## Test registry authentication before building 214 - @echo "Testing registry authentication for atcr.io..." 218 + @echo "Testing registry authentication for buoy.cr..." 215 219 @if [ -f /usr/local/sbin/docker-credential-atcr ]; then \ 216 220 echo "Testing credential helper..."; \ 217 - echo "atcr.io" | docker-credential-atcr get && echo "✓ Credential helper working!" || echo "✗ Credential helper failed"; \ 221 + echo "buoy.cr" | docker-credential-atcr get && echo "✓ Credential helper working!" || echo "✗ Credential helper failed"; \ 218 222 else \ 219 223 echo "⚠ Credential helper not found at /usr/local/sbin/docker-credential-atcr"; \ 220 224 fi ··· 222 226 @echo "Testing Docker config..." 223 227 @if [ -f $(HOME)/.docker/config.json ]; then \ 224 228 echo "✓ Docker config exists at $(HOME)/.docker/config.json"; \ 225 - cat $(HOME)/.docker/config.json | grep -q "atcr.io" && echo "✓ atcr.io found in config" || echo "⚠ atcr.io not found in config"; \ 229 + cat $(HOME)/.docker/config.json | grep -q "buoy.cr" && echo "✓ buoy.cr found in config" || echo "⚠ buoy.cr not found in config"; \ 226 230 else \ 227 231 echo "✗ Docker config not found"; \ 228 232 fi 229 233 @echo "" 230 234 @echo "Testing registry access with docker pull (this will fail if auth is broken)..." 231 - @$(CONTAINER_TOOL) pull atcr.io/evan.jarrett.net/loom-runner:latest 2>/dev/null && echo "✓ Can pull from registry!" || echo "⚠ Cannot pull from registry (may not exist yet)" 232 - 233 - # PLATFORMS defines the target platforms for the manager image be built to provide support to multiple 234 - # architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to: 235 - # - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/ 236 - # - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/ 237 - # - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=<myregistry/image:<tag>> then the export will fail) 238 - # To adequately provide solutions that are compatible with multiple platforms, you should consider using this option. 239 - PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le 240 - .PHONY: docker-buildx 241 - docker-buildx: ## Build and push docker image for the manager for cross-platform support 242 - # copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile 243 - sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross 244 - - $(CONTAINER_TOOL) buildx create --name loom-builder 245 - $(CONTAINER_TOOL) buildx use loom-builder 246 - - cd .. && $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f loom/Dockerfile.cross . 247 - - $(CONTAINER_TOOL) buildx rm loom-builder 248 - rm Dockerfile.cross 235 + @$(CONTAINER_TOOL) pull buoy.cr/evan.jarrett.net/loom-runner:latest 2>/dev/null && echo "✓ Can pull from registry!" || echo "⚠ Cannot pull from registry (may not exist yet)" 249 236 250 237 .PHONY: build-installer 251 238 build-installer: manifests generate kustomize ## Generate a consolidated YAML with CRDs and deployment.
+69
api/v1alpha1/spindleset_types.go
··· 61 61 Secrets []SecretData `json:"secrets,omitempty"` 62 62 63 63 // Workflows is the list of workflows to execute in this pipeline. 64 + // For multi-arch workflows, this contains one entry per matrix leg plus an optional final entry. 64 65 // +kubebuilder:validation:MinItems=1 65 66 Workflows []WorkflowSpec `json:"workflows"` 67 + 68 + // MultiArch indicates this pipeline run contains multi-arch workflows. 69 + // When true, the controller creates per-architecture Jobs and gates the final Job. 70 + // +optional 71 + MultiArch bool `json:"multiArch,omitempty"` 66 72 } 67 73 68 74 // SecretData represents a single secret key-value pair for injection into Jobs. ··· 81 87 82 88 // WorkflowSpec defines a workflow to execute as part of a pipeline. 83 89 // This is the canonical workflow definition that matches the .tangled/workflows/*.yaml format. 90 + // For multi-arch workflows, the engine expands the matrix and creates one WorkflowSpec per leg. 91 + // Each leg has a single Image and Architecture; the matrix metadata lives in PipelineRunSpec. 84 92 type WorkflowSpec struct { 85 93 // Name is the workflow filename (e.g., "workflow-amd64.yaml"). 86 94 // +kubebuilder:validation:Required ··· 110 118 // Dependencies specifies external dependencies for the workflow. 111 119 // +optional 112 120 Dependencies *WorkflowDependencies `json:"dependencies,omitempty"` 121 + 122 + // Final defines steps that run once after all matrix legs complete. 123 + // Only valid on multi-arch workflows. The engine sets this on the dedicated final WorkflowSpec. 124 + // +optional 125 + Final *FinalSpec `json:"final,omitempty"` 126 + 127 + // IsMatrixLeg indicates this WorkflowSpec was generated from a matrix expansion. 128 + // +optional 129 + IsMatrixLeg bool `json:"isMatrixLeg,omitempty"` 130 + 131 + // IsFinal indicates this WorkflowSpec represents the final step of a multi-arch workflow. 132 + // +optional 133 + IsFinal bool `json:"isFinal,omitempty"` 134 + } 135 + 136 + // FinalSpec defines steps that run once after all matrix legs complete successfully. 137 + type FinalSpec struct { 138 + // Architecture is the target architecture for the final steps. 139 + // +kubebuilder:validation:Required 140 + // +kubebuilder:validation:Enum=amd64;arm64 141 + Architecture string `json:"architecture"` 142 + 143 + // Image is the container image for the final steps. 144 + // If empty, uses the first image from the matrix. 145 + // +optional 146 + Image string `json:"image,omitempty"` 147 + 148 + // Steps is the ordered list of steps to execute after all matrix legs complete. 149 + // +kubebuilder:validation:MinItems=1 150 + Steps []WorkflowStep `json:"steps"` 113 151 } 114 152 115 153 // WorkflowStep defines a single step in a workflow. ··· 224 262 Name string `json:"name"` 225 263 226 264 // JobName is the name of the Kubernetes Job created for this workflow. 265 + // For multi-arch workflows, this is empty; use MatrixLegStatuses instead. 227 266 // +optional 228 267 JobName string `json:"jobName,omitempty"` 229 268 ··· 238 277 // CompletionTime is when the workflow finished. 239 278 // +optional 240 279 CompletionTime *metav1.Time `json:"completionTime,omitempty"` 280 + 281 + // MatrixLegStatuses tracks per-architecture Job statuses for multi-arch workflows. 282 + // +optional 283 + MatrixLegStatuses []MatrixLegStatus `json:"matrixLegStatuses,omitempty"` 284 + 285 + // FinalJobName is the name of the final Job (for multi-arch workflows). 286 + // +optional 287 + FinalJobName string `json:"finalJobName,omitempty"` 288 + 289 + // FinalPhase is the phase of the final Job. 290 + // +optional 291 + FinalPhase string `json:"finalPhase,omitempty"` 292 + } 293 + 294 + // MatrixLegStatus tracks the status of a single matrix leg Job. 295 + type MatrixLegStatus struct { 296 + // Architecture is the target architecture for this leg. 297 + Architecture string `json:"architecture"` 298 + 299 + // Image is the container image used for this leg. 300 + // +optional 301 + Image string `json:"image,omitempty"` 302 + 303 + // JobName is the name of the Kubernetes Job for this leg. 304 + // +optional 305 + JobName string `json:"jobName,omitempty"` 306 + 307 + // Phase is the current phase (Pending, Running, Succeeded, Failed). 308 + // +optional 309 + Phase string `json:"phase,omitempty"` 241 310 } 242 311 243 312 // +kubebuilder:object:root=true
+47
api/v1alpha1/zz_generated.deepcopy.go
··· 27 27 ) 28 28 29 29 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 30 + func (in *FinalSpec) DeepCopyInto(out *FinalSpec) { 31 + *out = *in 32 + if in.Steps != nil { 33 + in, out := &in.Steps, &out.Steps 34 + *out = make([]WorkflowStep, len(*in)) 35 + for i := range *in { 36 + (*in)[i].DeepCopyInto(&(*out)[i]) 37 + } 38 + } 39 + } 40 + 41 + // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FinalSpec. 42 + func (in *FinalSpec) DeepCopy() *FinalSpec { 43 + if in == nil { 44 + return nil 45 + } 46 + out := new(FinalSpec) 47 + in.DeepCopyInto(out) 48 + return out 49 + } 50 + 51 + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 52 + func (in *MatrixLegStatus) DeepCopyInto(out *MatrixLegStatus) { 53 + *out = *in 54 + } 55 + 56 + // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MatrixLegStatus. 57 + func (in *MatrixLegStatus) DeepCopy() *MatrixLegStatus { 58 + if in == nil { 59 + return nil 60 + } 61 + out := new(MatrixLegStatus) 62 + in.DeepCopyInto(out) 63 + return out 64 + } 65 + 66 + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 30 67 func (in *PipelineRunSpec) DeepCopyInto(out *PipelineRunSpec) { 31 68 *out = *in 32 69 if in.CloneCommands != nil { ··· 288 325 *out = new(WorkflowDependencies) 289 326 (*in).DeepCopyInto(*out) 290 327 } 328 + if in.Final != nil { 329 + in, out := &in.Final, &out.Final 330 + *out = new(FinalSpec) 331 + (*in).DeepCopyInto(*out) 332 + } 291 333 } 292 334 293 335 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkflowSpec. ··· 310 352 if in.CompletionTime != nil { 311 353 in, out := &in.CompletionTime, &out.CompletionTime 312 354 *out = (*in).DeepCopy() 355 + } 356 + if in.MatrixLegStatuses != nil { 357 + in, out := &in.MatrixLegStatuses, &out.MatrixLegStatuses 358 + *out = make([]MatrixLegStatus, len(*in)) 359 + copy(*out, *in) 313 360 } 314 361 } 315 362
+10
buf.gen.yaml
··· 1 + version: v2 2 + plugins: 3 + - local: protoc-gen-go 4 + out: internal/pb 5 + opt: 6 + - paths=source_relative 7 + - local: protoc-gen-go-grpc 8 + out: internal/pb 9 + opt: 10 + - paths=source_relative
+9
buf.yaml
··· 1 + version: v2 2 + modules: 3 + - path: proto 4 + lint: 5 + use: 6 + - STANDARD 7 + breaking: 8 + use: 9 + - FILE
+83 -10
cmd/controller/main.go
··· 22 22 _ "embed" 23 23 "flag" 24 24 "fmt" 25 + "net" 25 26 "os" 26 27 "path/filepath" 27 28 ··· 37 38 clientgoscheme "k8s.io/client-go/kubernetes/scheme" 38 39 ctrl "sigs.k8s.io/controller-runtime" 39 40 "sigs.k8s.io/controller-runtime/pkg/certwatcher" 41 + "sigs.k8s.io/controller-runtime/pkg/client" 40 42 "sigs.k8s.io/controller-runtime/pkg/healthz" 41 43 "sigs.k8s.io/controller-runtime/pkg/log/zap" 42 44 "sigs.k8s.io/controller-runtime/pkg/metrics/filters" ··· 50 52 loomv1alpha1 "tangled.org/evan.jarrett.net/loom/api/v1alpha1" 51 53 "tangled.org/evan.jarrett.net/loom/internal/controller" 52 54 "tangled.org/evan.jarrett.net/loom/internal/engine" 55 + loomgrpc "tangled.org/evan.jarrett.net/loom/internal/grpc" 53 56 // +kubebuilder:scaffold:imports 54 57 ) 55 58 ··· 177 180 178 181 // initializeSpindle creates a spindle server with KubernetesEngine 179 182 func initializeSpindle( 180 - ctx context.Context, cfg *config.Config, mgr ctrl.Manager, loomCfg *LoomConfig, 183 + ctx context.Context, cfg *config.Config, mgr ctrl.Manager, loomCfg *LoomConfig, hub *loomgrpc.Hub, artifacts *loomgrpc.ArtifactStore, 181 184 ) (*spindle.Spindle, error) { 182 185 // Initialize Kubernetes engine 183 186 // Get namespace from environment (injected via Downward API) ··· 203 206 return nil, fmt.Errorf("failed to create spindle: %w", err) 204 207 } 205 208 206 - // Now create kubernetes engine with access to vault 207 - kubeEngine := engine.NewKubernetesEngine(mgr.GetClient(), mgr.GetConfig(), namespace, template, s.Vault()) 209 + // Now create kubernetes engine with access to vault and gRPC hub 210 + kubeEngine := engine.NewKubernetesEngine(mgr.GetClient(), mgr.GetConfig(), namespace, template, s.Vault(), hub, artifacts) 208 211 209 212 // Register the engine with spindle by adding to the engines map 210 213 s.Engines()["kubernetes"] = kubeEngine ··· 385 388 os.Exit(1) 386 389 } 387 390 391 + // Create gRPC hub for runner communication 392 + hub := loomgrpc.NewHub() 393 + 394 + // Create artifact store for pipeline artifacts (scratch directory) 395 + artifactDir := "/scratch/artifacts" 396 + if dir := os.Getenv("LOOM_ARTIFACT_DIR"); dir != "" { 397 + artifactDir = dir 398 + } 399 + artifactStore, err := loomgrpc.NewArtifactStore(artifactDir) 400 + if err != nil { 401 + setupLog.Error(err, "failed to create artifact store") 402 + os.Exit(1) 403 + } 404 + 388 405 // Initialize spindle server with KubernetesEngine 389 - s, err := initializeSpindle(ctx, spindleCfg, mgr, loomCfg) 406 + s, err := initializeSpindle(ctx, spindleCfg, mgr, loomCfg, hub, artifactStore) 390 407 if err != nil { 391 408 setupLog.Error(err, "failed to initialize spindle") 392 409 os.Exit(1) ··· 396 413 397 414 setupLog.Info("spindle server initialized successfully") 398 415 416 + // Start gRPC server for runner communication 417 + grpcAddr := ":9090" 418 + if addr := os.Getenv("LOOM_GRPC_ADDR"); addr != "" { 419 + grpcAddr = addr 420 + } 421 + 422 + grpcServer := loomgrpc.NewServer(hub, artifactStore) 423 + go func() { 424 + lis, err := net.Listen("tcp", grpcAddr) 425 + if err != nil { 426 + setupLog.Error(err, "failed to listen for gRPC", "address", grpcAddr) 427 + os.Exit(1) 428 + } 429 + setupLog.Info("starting gRPC server", "address", grpcAddr) 430 + if err := grpcServer.Serve(lis); err != nil { 431 + setupLog.Error(err, "gRPC server error") 432 + } 433 + }() 434 + defer grpcServer.GracefulStop() 435 + 399 436 // Start spindle HTTP server in background 400 437 go func() { 401 438 setupLog.Info("starting spindle HTTP server", "address", spindleCfg.Server.ListenAddr) ··· 407 444 // Get loom image from environment (used for runner init container) 408 445 loomImage := os.Getenv("LOOM_IMAGE") 409 446 if loomImage == "" { 410 - loomImage = "atcr.io/evan.jarrett.net/loom:latest" // default fallback 447 + loomImage = "buoy.cr/evan.jarrett.net/loom:latest" // default fallback 448 + } 449 + 450 + // Discover the gRPC service address that runner pods will use to reach the operator. 451 + podNamespace := os.Getenv("POD_NAMESPACE") 452 + if podNamespace == "" { 453 + podNamespace = "default" 454 + } 455 + operatorAddr := os.Getenv("LOOM_OPERATOR_ADDR") 456 + if operatorAddr == "" { 457 + // Find the gRPC service by label in our namespace 458 + var services corev1.ServiceList 459 + if err := mgr.GetAPIReader().List(context.Background(), &services, 460 + client.InNamespace(podNamespace), 461 + client.MatchingLabels{ 462 + "app.kubernetes.io/name": "loom", 463 + "app.kubernetes.io/component": "grpc", 464 + }, 465 + ); err != nil { 466 + setupLog.Error(err, "failed to discover gRPC service") 467 + os.Exit(1) 468 + } 469 + if len(services.Items) == 0 { 470 + setupLog.Error(nil, "no gRPC service found with label app.kubernetes.io/component=grpc") 471 + os.Exit(1) 472 + } 473 + svc := services.Items[0] 474 + grpcPort := int32(9090) 475 + for _, p := range svc.Spec.Ports { 476 + if p.Name == "grpc" { 477 + grpcPort = p.Port 478 + break 479 + } 480 + } 481 + operatorAddr = fmt.Sprintf("%s.%s.svc.cluster.local:%d", svc.Name, podNamespace, grpcPort) 482 + setupLog.Info("discovered gRPC service", "address", operatorAddr) 411 483 } 412 484 413 485 // Setup controller with spindle components 414 486 if err := (&controller.SpindleSetReconciler{ 415 - Client: mgr.GetClient(), 416 - Scheme: mgr.GetScheme(), 417 - Config: mgr.GetConfig(), 418 - Spindle: s, 419 - LoomImage: loomImage, 487 + Client: mgr.GetClient(), 488 + Scheme: mgr.GetScheme(), 489 + Config: mgr.GetConfig(), 490 + Spindle: s, 491 + LoomImage: loomImage, 492 + OperatorAddr: operatorAddr, 420 493 }).SetupWithManager(mgr); err != nil { 421 494 setupLog.Error(err, "unable to create controller", "controller", "SpindleSet") 422 495 os.Exit(1)
+254 -45
cmd/runner/main.go
··· 8 8 "io" 9 9 "os" 10 10 "os/exec" 11 + "path/filepath" 12 + 13 + "google.golang.org/grpc" 14 + "google.golang.org/grpc/credentials/insecure" 11 15 12 16 "tangled.org/core/spindle/models" 13 17 loomv1alpha1 "tangled.org/evan.jarrett.net/loom/api/v1alpha1" 18 + pb "tangled.org/evan.jarrett.net/loom/internal/pb/loom/v1" 14 19 ) 15 20 16 21 // simpleStep implements the models.Step interface ··· 19 24 command string 20 25 } 21 26 22 - // extendedLogLine extends models.LogLine with exit code for error reporting 23 - type extendedLogLine struct { 24 - models.LogLine 25 - ExitCode int `json:"exit_code,omitempty"` 27 + func (s *simpleStep) Name() string { return s.name } 28 + func (s *simpleStep) Command() string { return s.command } 29 + func (s *simpleStep) Kind() models.StepKind { 30 + return models.StepKindUser 26 31 } 27 32 28 - func (s *simpleStep) Name() string { 29 - return s.name 33 + // grpcEmitter sends events to the operator over gRPC and also writes to stdout. 34 + type grpcEmitter struct { 35 + stream grpc.BidiStreamingClient[pb.ConnectRequest, pb.ConnectResponse] 30 36 } 31 37 32 - func (s *simpleStep) Command() string { 33 - return s.command 38 + func (e *grpcEmitter) sendStepControl(stepID int, status string, exitCode int) { 39 + if e.stream != nil { 40 + _ = e.stream.Send(&pb.ConnectRequest{ 41 + Event: &pb.ConnectRequest_StepControl{ 42 + StepControl: &pb.StepControl{ 43 + StepId: int32(stepID), 44 + Status: status, 45 + ExitCode: int32(exitCode), 46 + }, 47 + }, 48 + }) 49 + } 34 50 } 35 51 36 - func (s *simpleStep) Kind() models.StepKind { 37 - return models.StepKindUser 52 + func (e *grpcEmitter) sendLogLine(stepID int, streamName, content string) { 53 + if e.stream != nil { 54 + _ = e.stream.Send(&pb.ConnectRequest{ 55 + Event: &pb.ConnectRequest_LogLine{ 56 + LogLine: &pb.LogLine{ 57 + StepId: int32(stepID), 58 + Stream: streamName, 59 + Content: content, 60 + }, 61 + }, 62 + }) 63 + } 38 64 } 39 65 40 66 func main() { ··· 76 102 return fmt.Errorf("failed to copy: %w", err) 77 103 } 78 104 79 - // Make executable 80 105 if err := os.Chmod(dst, 0755); err != nil { 81 106 return fmt.Errorf("failed to chmod: %w", err) 82 107 } ··· 96 121 return fmt.Errorf("failed to parse workflow spec: %w", err) 97 122 } 98 123 124 + // Connect to operator via gRPC 125 + emitter, cleanup, err := connectToOperator(workflow) 126 + if err != nil { 127 + // gRPC connection failure is fatal — the operator won't see our events 128 + return fmt.Errorf("failed to connect to operator: %w", err) 129 + } 130 + defer cleanup() 131 + 99 132 // Set up environment variables 100 133 if workflow.Environment != nil { 101 134 for k, v := range workflow.Environment { ··· 105 138 } 106 139 } 107 140 141 + // Create artifacts directory so user steps can write to it (non-root container 142 + // can't create /artifacts at root itself). Safe to create even when unused. 143 + artifactsDir := os.Getenv("LOOM_ARTIFACTS") 144 + if artifactsDir != "" { 145 + if err := os.MkdirAll(artifactsDir, 0755); err != nil { 146 + return fmt.Errorf("failed to create artifacts directory: %w", err) 147 + } 148 + } 149 + 150 + // For final jobs, download artifacts from matrix legs before executing steps 151 + if os.Getenv("LOOM_FINAL") == "true" && artifactsDir != "" { 152 + fmt.Fprintf(os.Stderr, "downloading artifacts from matrix legs...\n") 153 + if err := downloadArtifacts(emitter, artifactsDir); err != nil { 154 + return fmt.Errorf("failed to download artifacts: %w", err) 155 + } 156 + fmt.Fprintf(os.Stderr, "artifacts downloaded to %s\n", artifactsDir) 157 + } 158 + 108 159 // Execute each step 109 160 ctx := context.Background() 110 161 for i, step := range workflow.Steps { 111 - if err := executeStep(ctx, i, step); err != nil { 162 + if err := executeStep(ctx, i, step, emitter); err != nil { 112 163 return fmt.Errorf("step %d (%s) failed: %w", i, step.Name, err) 113 164 } 114 165 } 115 166 167 + // Upload artifacts if this is a matrix leg 168 + if os.Getenv("LOOM_MATRIX_LEG") == "true" && artifactsDir != "" { 169 + if err := uploadArtifacts(emitter, artifactsDir); err != nil { 170 + fmt.Fprintf(os.Stderr, "WARNING: failed to upload artifacts: %v\n", err) 171 + } 172 + } 173 + 116 174 return nil 117 175 } 118 176 119 - func executeStep(ctx context.Context, stepID int, step loomv1alpha1.WorkflowStep) error { 120 - // Create a simple step for logging 121 - simpleStep := &simpleStep{ 122 - name: step.Name, 123 - command: step.Command, 177 + // connectToOperator establishes the gRPC connection and sends the identity message. 178 + func connectToOperator(workflow loomv1alpha1.WorkflowSpec) (*grpcEmitter, func(), error) { 179 + addr := os.Getenv("LOOM_OPERATOR_ADDR") 180 + if addr == "" { 181 + return nil, nil, fmt.Errorf("LOOM_OPERATOR_ADDR environment variable not set") 182 + } 183 + 184 + conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials())) 185 + if err != nil { 186 + return nil, nil, fmt.Errorf("failed to create gRPC client: %w", err) 187 + } 188 + 189 + client := pb.NewLoomRunnerServiceClient(conn) 190 + stream, err := client.Connect(context.Background()) 191 + if err != nil { 192 + conn.Close() 193 + return nil, nil, fmt.Errorf("failed to open gRPC stream: %w", err) 194 + } 195 + 196 + // Send identity message 197 + pipelineID := os.Getenv("TANGLED_PIPELINE_ID") 198 + if pipelineID == "" { 199 + pipelineID = workflow.Environment["TANGLED_PIPELINE_ID"] 200 + } 201 + 202 + if err := stream.Send(&pb.ConnectRequest{ 203 + PipelineId: pipelineID, 204 + WorkflowName: workflow.Name, 205 + Architecture: workflow.Architecture, 206 + }); err != nil { 207 + conn.Close() 208 + return nil, nil, fmt.Errorf("failed to send identity: %w", err) 209 + } 210 + 211 + emitter := &grpcEmitter{stream: stream} 212 + cleanup := func() { 213 + _ = stream.CloseSend() 214 + conn.Close() 124 215 } 125 216 126 - // Emit step start event 127 - emitControlEvent(stepID, simpleStep, models.StepStatusStart) 217 + fmt.Fprintf(os.Stderr, "connected to operator at %s\n", addr) 218 + return emitter, cleanup, nil 219 + } 220 + 221 + func executeStep(ctx context.Context, stepID int, step loomv1alpha1.WorkflowStep, emitter *grpcEmitter) error { 222 + // Emit step start — gRPC + stdout 223 + emitter.sendStepControl(stepID, "start", 0) 224 + emitStdoutControl(stepID, &simpleStep{name: step.Name, command: step.Command}, models.StepStatusStart) 128 225 129 226 // Set step-specific environment variables 130 227 if step.Environment != nil { ··· 136 233 } 137 234 138 235 // Create command that auto-sources LOOM_ENV if it exists 139 - // Users can write "VAR=value" to this file to share env vars between steps 140 236 wrappedCommand := `if [ -f "$LOOM_ENV" ]; then set -a; source "$LOOM_ENV"; set +a; fi; ` + step.Command 141 237 cmd := exec.CommandContext(ctx, "bash", "-c", wrappedCommand) 142 238 cmd.Dir = "/tangled/workspace" 143 239 cmd.Env = append(os.Environ(), "LOOM_ENV=/tangled/workspace/.loom-env") 144 240 145 - // Capture stdout and stderr 146 241 stdout, err := cmd.StdoutPipe() 147 242 if err != nil { 148 243 return fmt.Errorf("failed to create stdout pipe: %w", err) ··· 153 248 return fmt.Errorf("failed to create stderr pipe: %w", err) 154 249 } 155 250 156 - // Start the command 157 251 if err := cmd.Start(); err != nil { 158 - emitControlEvent(stepID, simpleStep, models.StepStatusEnd) 252 + emitter.sendStepControl(stepID, "end", 1) 159 253 return fmt.Errorf("failed to start command: %w", err) 160 254 } 161 255 162 256 // Stream stdout and stderr concurrently 163 257 done := make(chan error, 2) 164 - go streamOutput(stdout, stepID, "stdout", done) 165 - go streamOutput(stderr, stepID, "stderr", done) 258 + go streamOutput(stdout, stepID, "stdout", emitter, done) 259 + go streamOutput(stderr, stepID, "stderr", emitter, done) 166 260 167 - // Wait for both streams to complete 168 261 for i := 0; i < 2; i++ { 169 262 if err := <-done; err != nil { 170 - // Log error but don't fail - we still want to wait for the command 171 263 fmt.Fprintf(os.Stderr, "WARNING: error streaming output: %v\n", err) 172 264 } 173 265 } ··· 183 275 } 184 276 } 185 277 186 - // Emit step end event with exit code for error reporting 187 - emitControlEventWithCode(stepID, simpleStep, models.StepStatusEnd, exitCode) 278 + // Emit step end — gRPC + stdout 279 + emitter.sendStepControl(stepID, "end", exitCode) 280 + emitStdoutControlWithCode(stepID, &simpleStep{name: step.Name, command: step.Command}, models.StepStatusEnd, exitCode) 188 281 189 282 if exitCode != 0 { 190 283 return fmt.Errorf("command exited with code %d", exitCode) ··· 193 286 return nil 194 287 } 195 288 196 - func streamOutput(reader io.Reader, stepID int, stream string, done chan<- error) { 289 + func streamOutput(reader io.Reader, stepID int, streamName string, emitter *grpcEmitter, done chan<- error) { 197 290 scanner := bufio.NewScanner(reader) 198 - // Increase buffer size for long lines 199 291 buf := make([]byte, 0, 64*1024) 200 292 scanner.Buffer(buf, 1024*1024) 201 293 202 294 for scanner.Scan() { 203 295 line := scanner.Text() 204 - emitDataEvent(stepID, stream, line) 296 + // Send over gRPC (primary channel) 297 + emitter.sendLogLine(stepID, streamName, line) 298 + // Also emit to stdout for kubectl logs 299 + emitStdoutData(stepID, streamName, line) 205 300 } 206 301 207 302 done <- scanner.Err() 208 303 } 209 304 210 - func emitControlEvent(stepID int, step models.Step, status models.StepStatus) { 305 + // Stdout emitters — for kubectl logs debugging. Not consumed by the operator. 306 + 307 + func emitStdoutControl(stepID int, step models.Step, status models.StepStatus) { 211 308 logLine := models.NewControlLogLine(stepID, step, status) 212 - emitJSON(logLine) 309 + emitStdoutJSON(logLine) 213 310 } 214 311 215 - // emitControlEventWithCode emits a control event with an exit code for error reporting 216 - func emitControlEventWithCode(stepID int, step models.Step, status models.StepStatus, exitCode int) { 312 + func emitStdoutControlWithCode(stepID int, step models.Step, status models.StepStatus, exitCode int) { 217 313 logLine := models.NewControlLogLine(stepID, step, status) 218 - extended := extendedLogLine{ 219 - LogLine: logLine, 220 - ExitCode: exitCode, 314 + type extended struct { 315 + models.LogLine 316 + ExitCode int `json:"exit_code,omitempty"` 221 317 } 222 - data, err := json.Marshal(extended) 318 + data, err := json.Marshal(extended{LogLine: logLine, ExitCode: exitCode}) 223 319 if err != nil { 224 - fmt.Fprintf(os.Stderr, "ERROR: failed to marshal JSON: %v\n", err) 225 320 return 226 321 } 227 322 fmt.Println(string(data)) 228 323 } 229 324 230 - func emitDataEvent(stepID int, stream, content string) { 325 + func emitStdoutData(stepID int, stream, content string) { 231 326 logLine := models.NewDataLogLine(stepID, content, stream) 232 - emitJSON(logLine) 327 + emitStdoutJSON(logLine) 233 328 } 234 329 235 - func emitJSON(logLine models.LogLine) { 330 + func emitStdoutJSON(logLine models.LogLine) { 236 331 data, err := json.Marshal(logLine) 237 332 if err != nil { 238 - fmt.Fprintf(os.Stderr, "ERROR: failed to marshal JSON: %v\n", err) 239 333 return 240 334 } 241 335 fmt.Println(string(data)) 242 336 } 337 + 338 + // uploadArtifacts walks the artifacts directory and streams all files to the operator. 339 + func uploadArtifacts(emitter *grpcEmitter, dir string) error { 340 + info, err := os.Stat(dir) 341 + if os.IsNotExist(err) { 342 + return nil // No artifacts to upload 343 + } 344 + if err != nil { 345 + return err 346 + } 347 + if !info.IsDir() { 348 + return nil 349 + } 350 + 351 + const chunkSize = 32 * 1024 // 32KB 352 + 353 + return filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { 354 + if err != nil || info.IsDir() { 355 + return err 356 + } 357 + 358 + relPath, err := filepath.Rel(dir, path) 359 + if err != nil { 360 + return err 361 + } 362 + 363 + f, err := os.Open(path) 364 + if err != nil { 365 + return fmt.Errorf("failed to open artifact %s: %w", relPath, err) 366 + } 367 + defer f.Close() 368 + 369 + buf := make([]byte, chunkSize) 370 + for { 371 + n, readErr := f.Read(buf) 372 + isEOF := readErr == io.EOF 373 + 374 + if emitter.stream != nil { 375 + _ = emitter.stream.Send(&pb.ConnectRequest{ 376 + Event: &pb.ConnectRequest_ArtifactChunk{ 377 + ArtifactChunk: &pb.ArtifactChunk{ 378 + Path: relPath, 379 + Data: buf[:n], 380 + Eof: isEOF, 381 + }, 382 + }, 383 + }) 384 + } 385 + 386 + if isEOF { 387 + break 388 + } 389 + if readErr != nil { 390 + return fmt.Errorf("failed to read artifact %s: %w", relPath, readErr) 391 + } 392 + } 393 + 394 + fmt.Fprintf(os.Stderr, "uploaded artifact: %s\n", relPath) 395 + return nil 396 + }) 397 + } 398 + 399 + // downloadArtifacts receives artifact files from the operator into the local artifacts directory. 400 + // Used by final jobs to receive artifacts from matrix legs. 401 + func downloadArtifacts(emitter *grpcEmitter, dir string) error { 402 + if emitter.stream == nil { 403 + return nil 404 + } 405 + 406 + for { 407 + resp, err := emitter.stream.Recv() 408 + if err == io.EOF { 409 + return nil 410 + } 411 + if err != nil { 412 + return fmt.Errorf("failed to receive artifact: %w", err) 413 + } 414 + 415 + ad, ok := resp.Event.(*pb.ConnectResponse_ArtifactData) 416 + if !ok { 417 + continue 418 + } 419 + 420 + data := ad.ArtifactData 421 + 422 + // Sentinel: empty path with Eof=true signals "all artifacts sent". 423 + if data.Path == "" && data.Eof { 424 + return nil 425 + } 426 + 427 + targetDir := filepath.Join(dir, data.SourceArchitecture) 428 + targetPath := filepath.Join(targetDir, data.Path) 429 + 430 + if err := os.MkdirAll(filepath.Dir(targetPath), 0755); err != nil { 431 + return fmt.Errorf("failed to create artifact directory: %w", err) 432 + } 433 + 434 + f, err := os.OpenFile(targetPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) 435 + if err != nil { 436 + return fmt.Errorf("failed to open artifact file: %w", err) 437 + } 438 + 439 + if len(data.Data) > 0 { 440 + if _, err := f.Write(data.Data); err != nil { 441 + f.Close() 442 + return fmt.Errorf("failed to write artifact: %w", err) 443 + } 444 + } 445 + f.Close() 446 + 447 + if data.Eof { 448 + fmt.Fprintf(os.Stderr, "downloaded artifact: %s/%s\n", data.SourceArchitecture, data.Path) 449 + } 450 + } 451 + }
+102 -4
config/crd/bases/loom.j5t.io_spindlesets.yaml
··· 77 77 items: 78 78 type: string 79 79 type: array 80 + multiArch: 81 + description: |- 82 + MultiArch indicates this pipeline run contains multi-arch workflows. 83 + When true, the controller creates per-architecture Jobs and gates the final Job. 84 + type: boolean 80 85 pipelineID: 81 86 description: PipelineID is the unique identifier for this pipeline 82 87 run from the knot. ··· 110 115 container entirely. 111 116 type: boolean 112 117 workflows: 113 - description: Workflows is the list of workflows to execute in 114 - this pipeline. 118 + description: |- 119 + Workflows is the list of workflows to execute in this pipeline. 120 + For multi-arch workflows, this contains one entry per matrix leg plus an optional final entry. 115 121 items: 116 122 description: |- 117 123 WorkflowSpec defines a workflow to execute as part of a pipeline. 118 124 This is the canonical workflow definition that matches the .tangled/workflows/*.yaml format. 125 + For multi-arch workflows, the engine expands the matrix and creates one WorkflowSpec per leg. 126 + Each leg has a single Image and Architecture; the matrix metadata lives in PipelineRunSpec. 119 127 properties: 120 128 architecture: 121 129 description: Architecture is the target architecture for ··· 141 149 description: Environment contains workflow-level environment 142 150 variables. 143 151 type: object 152 + final: 153 + description: |- 154 + Final defines steps that run once after all matrix legs complete. 155 + Only valid on multi-arch workflows. The engine sets this on the dedicated final WorkflowSpec. 156 + properties: 157 + architecture: 158 + description: Architecture is the target architecture 159 + for the final steps. 160 + enum: 161 + - amd64 162 + - arm64 163 + type: string 164 + image: 165 + description: |- 166 + Image is the container image for the final steps. 167 + If empty, uses the first image from the matrix. 168 + type: string 169 + steps: 170 + description: Steps is the ordered list of steps to execute 171 + after all matrix legs complete. 172 + items: 173 + description: WorkflowStep defines a single step in 174 + a workflow. 175 + properties: 176 + command: 177 + description: Command is the shell command to execute. 178 + type: string 179 + environment: 180 + additionalProperties: 181 + type: string 182 + description: Environment contains step-specific 183 + environment variables. 184 + type: object 185 + name: 186 + description: Name is the human-readable name of 187 + the step. 188 + type: string 189 + required: 190 + - command 191 + - name 192 + type: object 193 + minItems: 1 194 + type: array 195 + required: 196 + - architecture 197 + - steps 198 + type: object 144 199 image: 145 200 description: Image is the container image to use for executing 146 201 the workflow steps. 147 202 type: string 203 + isFinal: 204 + description: IsFinal indicates this WorkflowSpec represents 205 + the final step of a multi-arch workflow. 206 + type: boolean 207 + isMatrixLeg: 208 + description: IsMatrixLeg indicates this WorkflowSpec was 209 + generated from a matrix expansion. 210 + type: boolean 148 211 name: 149 212 description: Name is the workflow filename (e.g., "workflow-amd64.yaml"). 150 213 type: string ··· 1360 1423 description: CompletionTime is when the workflow finished. 1361 1424 format: date-time 1362 1425 type: string 1426 + finalJobName: 1427 + description: FinalJobName is the name of the final Job (for 1428 + multi-arch workflows). 1429 + type: string 1430 + finalPhase: 1431 + description: FinalPhase is the phase of the final Job. 1432 + type: string 1363 1433 jobName: 1364 - description: JobName is the name of the Kubernetes Job created 1365 - for this workflow. 1434 + description: |- 1435 + JobName is the name of the Kubernetes Job created for this workflow. 1436 + For multi-arch workflows, this is empty; use MatrixLegStatuses instead. 1366 1437 type: string 1438 + matrixLegStatuses: 1439 + description: MatrixLegStatuses tracks per-architecture Job statuses 1440 + for multi-arch workflows. 1441 + items: 1442 + description: MatrixLegStatus tracks the status of a single 1443 + matrix leg Job. 1444 + properties: 1445 + architecture: 1446 + description: Architecture is the target architecture for 1447 + this leg. 1448 + type: string 1449 + image: 1450 + description: Image is the container image used for this 1451 + leg. 1452 + type: string 1453 + jobName: 1454 + description: JobName is the name of the Kubernetes Job 1455 + for this leg. 1456 + type: string 1457 + phase: 1458 + description: Phase is the current phase (Pending, Running, 1459 + Succeeded, Failed). 1460 + type: string 1461 + required: 1462 + - architecture 1463 + type: object 1464 + type: array 1367 1465 name: 1368 1466 description: Name is the workflow name. 1369 1467 type: string
+1 -1
config/gateway/httproute.yaml
··· 11 11 - loom.jarrett.net 12 12 rules: 13 13 - backendRefs: 14 - - name: loom-loom-spindle-service 14 + - name: loom-spindle-service 15 15 port: 6555
+20
config/manager/grpc_service.yaml
··· 1 + --- 2 + apiVersion: v1 3 + kind: Service 4 + metadata: 5 + name: controller-manager-grpc 6 + namespace: system 7 + labels: 8 + app.kubernetes.io/name: loom 9 + app.kubernetes.io/component: grpc 10 + app.kubernetes.io/managed-by: kustomize 11 + spec: 12 + selector: 13 + control-plane: controller-manager 14 + app.kubernetes.io/name: loom 15 + ports: 16 + - name: grpc 17 + port: 9090 18 + protocol: TCP 19 + targetPort: 9090 20 + type: ClusterIP
+2 -1
config/manager/kustomization.yaml
··· 1 1 resources: 2 2 - manager.yaml 3 3 - service.yaml 4 + - grpc_service.yaml 4 5 - pvc.yaml 5 6 - loom-config.yaml 6 7 apiVersion: kustomize.config.k8s.io/v1beta1 7 8 kind: Kustomization 8 9 images: 9 10 - name: controller 10 - newName: atcr.io/evan.jarrett.net/loom 11 + newName: buoy.cr/evan.jarrett.net/loom 11 12 newTag: latest
+1 -1
config/manager/loom-config.yaml
··· 1 1 apiVersion: v1 2 2 kind: ConfigMap 3 3 metadata: 4 - name: loom-config 4 + name: config 5 5 namespace: system 6 6 data: 7 7 config.yaml: |
+6 -2
config/manager/manager.yaml
··· 66 66 - /manager 67 67 args: 68 68 - --health-probe-bind-address=:8081 69 - image: atcr.io/evan.jarrett.net/loom:latest 69 + image: buoy.cr/evan.jarrett.net/loom:latest 70 70 imagePullPolicy: Always 71 71 name: manager 72 72 env: ··· 75 75 fieldRef: 76 76 fieldPath: metadata.namespace 77 77 - name: LOOM_IMAGE 78 - value: "atcr.io/evan.jarrett.net/loom:latest" 78 + value: "buoy.cr/evan.jarrett.net/loom:latest" 79 79 - name: SPINDLE_SERVER_HOSTNAME 80 80 value: "loom.jarrett.net" 81 81 - name: SPINDLE_SERVER_OWNER ··· 119 119 - name: loom-config 120 120 mountPath: /etc/loom 121 121 readOnly: true 122 + - name: scratch 123 + mountPath: /scratch 122 124 volumes: 123 125 - name: spindle-logs 124 126 persistentVolumeClaim: ··· 129 131 - name: loom-config 130 132 configMap: 131 133 name: loom-config 134 + - name: scratch 135 + emptyDir: {} 132 136 serviceAccountName: controller-manager 133 137 terminationGracePeriodSeconds: 10
+1 -1
config/manager/service.yaml
··· 2 2 apiVersion: v1 3 3 kind: Service 4 4 metadata: 5 - name: loom-spindle-service 5 + name: spindle-service 6 6 namespace: system 7 7 labels: 8 8 app.kubernetes.io/name: loom
+6 -7
go.mod
··· 5 5 require ( 6 6 github.com/cenkalti/backoff/v4 v4.3.0 7 7 github.com/cyphar/filepath-securejoin v0.6.1 8 + github.com/go-logr/logr v1.4.3 8 9 github.com/onsi/ginkgo/v2 v2.28.1 9 - github.com/onsi/gomega v1.39.0 10 + github.com/onsi/gomega v1.39.1 11 + google.golang.org/grpc v1.80.0 12 + google.golang.org/protobuf v1.36.11 10 13 gopkg.in/yaml.v3 v3.0.1 11 14 k8s.io/api v0.35.3 12 15 k8s.io/apimachinery v0.35.3 ··· 17 20 18 21 require ( 19 22 cel.dev/expr v0.25.1 // indirect 20 - github.com/Blank-Xu/sql-adapter v1.2.1 // indirect 23 + github.com/Blank-Xu/sql-adapter v1.1.1 // indirect 21 24 github.com/Masterminds/semver/v3 v3.4.0 // indirect 22 25 github.com/Microsoft/go-winio v0.6.2 // indirect 23 26 github.com/antlr4-go/antlr/v4 v4.13.1 // indirect ··· 48 51 github.com/bluesky-social/jetstream v0.0.0-20260226214936-e0274250f654 // indirect 49 52 github.com/bmatcuk/doublestar/v4 v4.10.0 // indirect 50 53 github.com/carlmjohnson/versioninfo v0.22.5 // indirect 51 - github.com/casbin/casbin/v2 v2.135.0 // indirect 52 - github.com/casbin/casbin/v3 v3.10.0 // indirect 54 + github.com/casbin/casbin/v2 v2.103.0 // indirect 53 55 github.com/casbin/govaluate v1.10.0 // indirect 54 56 github.com/cenkalti/backoff/v5 v5.0.3 // indirect 55 57 github.com/cespare/xxhash/v2 v2.3.0 // indirect ··· 79 81 github.com/go-git/go-git/v5 v5.17.2 // indirect 80 82 github.com/go-jose/go-jose/v4 v4.1.4 // indirect 81 83 github.com/go-logfmt/logfmt v0.6.1 // indirect 82 - github.com/go-logr/logr v1.4.3 // indirect 83 84 github.com/go-logr/stdr v1.2.2 // indirect 84 85 github.com/go-logr/zapr v1.3.0 // indirect 85 86 github.com/go-openapi/jsonpointer v0.22.5 // indirect ··· 211 212 gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect 212 213 google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect 213 214 google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect 214 - google.golang.org/grpc v1.80.0 // indirect 215 - google.golang.org/protobuf v1.36.11 // indirect 216 215 gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect 217 216 gopkg.in/fsnotify.v1 v1.4.7 // indirect 218 217 gopkg.in/inf.v0 v0.9.1 // indirect
+9 -9
go.sum
··· 41 41 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= 42 42 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= 43 43 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= 44 - github.com/Blank-Xu/sql-adapter v1.2.1 h1:Gl9CZI3PCDLg2EKvmYFbieOe95IRJMuruj7AL9JXsLk= 45 - github.com/Blank-Xu/sql-adapter v1.2.1/go.mod h1:Duskd1ORzVkmxOxk6i6HSAdmASjqVhg9fcAefibnrns= 44 + github.com/Blank-Xu/sql-adapter v1.1.1 h1:+g7QXU9sl/qT6Po97teMpf3GjAO0X9aFaqgSePXvYko= 45 + github.com/Blank-Xu/sql-adapter v1.1.1/go.mod h1:o2g8EZhZ3TudnYEGDkoU+3jCTCgDgx1o/Ig5ajKkaLY= 46 46 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 47 47 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= 48 48 github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= ··· 112 112 github.com/bluesky-social/jetstream v0.0.0-20260226214936-e0274250f654 h1:OK76FcHhZp8ohjRB0OMWgti0oYAWFlt3KDQcIkH1pfI= 113 113 github.com/bluesky-social/jetstream v0.0.0-20260226214936-e0274250f654/go.mod h1:vt8kVRKtvrBspt9G38wDD8+BotjIMO8u8IYoVnyE4zY= 114 114 github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= 115 - github.com/bmatcuk/doublestar/v4 v4.9.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= 115 + github.com/bmatcuk/doublestar/v4 v4.7.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= 116 116 github.com/bmatcuk/doublestar/v4 v4.10.0 h1:zU9WiOla1YA122oLM6i4EXvGW62DvKZVxIe6TYWexEs= 117 117 github.com/bmatcuk/doublestar/v4 v4.10.0/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= 118 118 github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= ··· 121 121 github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= 122 122 github.com/carlmjohnson/versioninfo v0.22.5 h1:O00sjOLUAFxYQjlN/bzYTuZiS0y6fWDQjMRvwtKgwwc= 123 123 github.com/carlmjohnson/versioninfo v0.22.5/go.mod h1:QT9mph3wcVfISUKd0i9sZfVrPviHuSF+cUtLjm2WSf8= 124 - github.com/casbin/casbin/v2 v2.135.0 h1:6BLkMQiGotYyS5yYeWgW19vxqugUlvHFkFiLnLR/bxk= 125 - github.com/casbin/casbin/v2 v2.135.0/go.mod h1:FmcfntdXLTcYXv/hxgNntcRPqAbwOG9xsism0yXT+18= 126 - github.com/casbin/casbin/v3 v3.10.0 h1:039ORla55vCeIZWd0LfzWFt1yiEA5X4W41xBW2bQuHs= 127 - github.com/casbin/casbin/v3 v3.10.0/go.mod h1:5rJbQr2e6AuuDDNxnPc5lQlC9nIgg6nS1zYwKXhpHC8= 124 + github.com/casbin/casbin/v2 v2.100.0/go.mod h1:LO7YPez4dX3LgoTCqSQAleQDo0S0BeZBDxYnPUl95Ng= 125 + github.com/casbin/casbin/v2 v2.103.0 h1:dHElatNXNrr8XcseUov0ZSiWjauwmZZE6YMV3eU1yic= 126 + github.com/casbin/casbin/v2 v2.103.0/go.mod h1:Ee33aqGrmES+GNL17L0h9X28wXuo829wnNUnS0edAco= 127 + github.com/casbin/govaluate v1.2.0/go.mod h1:G/UnbIjZk/0uMNaLwZZmFQrR72tYRZWQkO70si/iR7A= 128 128 github.com/casbin/govaluate v1.3.0/go.mod h1:G/UnbIjZk/0uMNaLwZZmFQrR72tYRZWQkO70si/iR7A= 129 129 github.com/casbin/govaluate v1.10.0 h1:ffGw51/hYH3w3rZcxO/KcaUIDOLP84w7nsidMVgaDG0= 130 130 github.com/casbin/govaluate v1.10.0/go.mod h1:G/UnbIjZk/0uMNaLwZZmFQrR72tYRZWQkO70si/iR7A= ··· 584 584 github.com/onsi/gomega v1.24.0/go.mod h1:Z/NWtiqwBrwUt4/2loMmHL63EDLnYHmVbuBpDr2vQAg= 585 585 github.com/onsi/gomega v1.24.1/go.mod h1:3AOiACssS3/MajrniINInwbfOOtfZvplPzuRSmvt1jM= 586 586 github.com/onsi/gomega v1.25.0/go.mod h1:r+zV744Re+DiYCIPRlYOTxn0YkOLcAnW8k1xXdMPGhM= 587 - github.com/onsi/gomega v1.39.0 h1:y2ROC3hKFmQZJNFeGAMeHZKkjBL65mIZcvrLQBF9k6Q= 588 - github.com/onsi/gomega v1.39.0/go.mod h1:ZCU1pkQcXDO5Sl9/VVEGlDyp+zm0m1cmeG5TOzLgdh4= 587 + github.com/onsi/gomega v1.39.1 h1:1IJLAad4zjPn2PsnhH70V4DKRFlrCzGBNrNaru+Vf28= 588 + github.com/onsi/gomega v1.39.1/go.mod h1:hL6yVALoTOxeWudERyfppUcZXjMwIMLnuSfruD2lcfg= 589 589 github.com/openbao/openbao/api/v2 v2.5.1 h1:Br79D6L20SbAa5P7xqENxmvv8LyI4HoKosPy7klhn4o= 590 590 github.com/openbao/openbao/api/v2 v2.5.1/go.mod h1:Dh5un77tqGgMbmlVEqjqN+8/dMyUohnkaQVg/wXW0Ig= 591 591 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
+2 -2
helm/loom/Chart.yaml
··· 2 2 name: loom 3 3 description: A Kubernetes operator that runs CI/CD pipelines from tangled.org 4 4 type: application 5 - version: 0.0.1 6 - appVersion: "0.0.1" 5 + version: 0.1.5 6 + appVersion: "0.1.5" 7 7 home: https://github.com/tangled-sh/loom 8 8 sources: 9 9 - https://github.com/tangled-sh/loom
+104 -5
helm/loom/crds/loom.j5t.io_spindlesets.yaml
··· 77 77 items: 78 78 type: string 79 79 type: array 80 + multiArch: 81 + description: |- 82 + MultiArch indicates this pipeline run contains multi-arch workflows. 83 + When true, the controller creates per-architecture Jobs and gates the final Job. 84 + type: boolean 80 85 pipelineID: 81 86 description: PipelineID is the unique identifier for this pipeline 82 87 run from the knot. ··· 110 115 container entirely. 111 116 type: boolean 112 117 workflows: 113 - description: Workflows is the list of workflows to execute in 114 - this pipeline. 118 + description: |- 119 + Workflows is the list of workflows to execute in this pipeline. 120 + For multi-arch workflows, this contains one entry per matrix leg plus an optional final entry. 115 121 items: 116 122 description: |- 117 123 WorkflowSpec defines a workflow to execute as part of a pipeline. 118 124 This is the canonical workflow definition that matches the .tangled/workflows/*.yaml format. 125 + For multi-arch workflows, the engine expands the matrix and creates one WorkflowSpec per leg. 126 + Each leg has a single Image and Architecture; the matrix metadata lives in PipelineRunSpec. 119 127 properties: 120 128 architecture: 121 129 description: Architecture is the target architecture for ··· 141 149 description: Environment contains workflow-level environment 142 150 variables. 143 151 type: object 152 + final: 153 + description: |- 154 + Final defines steps that run once after all matrix legs complete. 155 + Only valid on multi-arch workflows. The engine sets this on the dedicated final WorkflowSpec. 156 + properties: 157 + architecture: 158 + description: Architecture is the target architecture 159 + for the final steps. 160 + enum: 161 + - amd64 162 + - arm64 163 + type: string 164 + image: 165 + description: |- 166 + Image is the container image for the final steps. 167 + If empty, uses the first image from the matrix. 168 + type: string 169 + steps: 170 + description: Steps is the ordered list of steps to execute 171 + after all matrix legs complete. 172 + items: 173 + description: WorkflowStep defines a single step in 174 + a workflow. 175 + properties: 176 + command: 177 + description: Command is the shell command to execute. 178 + type: string 179 + environment: 180 + additionalProperties: 181 + type: string 182 + description: Environment contains step-specific 183 + environment variables. 184 + type: object 185 + name: 186 + description: Name is the human-readable name of 187 + the step. 188 + type: string 189 + required: 190 + - command 191 + - name 192 + type: object 193 + minItems: 1 194 + type: array 195 + required: 196 + - architecture 197 + - steps 198 + type: object 144 199 image: 145 200 description: Image is the container image to use for executing 146 201 the workflow steps. 147 202 type: string 203 + isFinal: 204 + description: IsFinal indicates this WorkflowSpec represents 205 + the final step of a multi-arch workflow. 206 + type: boolean 207 + isMatrixLeg: 208 + description: IsMatrixLeg indicates this WorkflowSpec was 209 + generated from a matrix expansion. 210 + type: boolean 148 211 name: 149 212 description: Name is the workflow filename (e.g., "workflow-amd64.yaml"). 150 213 type: string ··· 1240 1303 operator: 1241 1304 description: |- 1242 1305 Operator represents a key's relationship to the value. 1243 - Valid operators are Exists and Equal. Defaults to Equal. 1306 + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. 1244 1307 Exists is equivalent to wildcard for value, so that a pod can 1245 1308 tolerate all taints of a particular category. 1309 + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). 1246 1310 type: string 1247 1311 tolerationSeconds: 1248 1312 description: |- ··· 1359 1423 description: CompletionTime is when the workflow finished. 1360 1424 format: date-time 1361 1425 type: string 1426 + finalJobName: 1427 + description: FinalJobName is the name of the final Job (for 1428 + multi-arch workflows). 1429 + type: string 1430 + finalPhase: 1431 + description: FinalPhase is the phase of the final Job. 1432 + type: string 1362 1433 jobName: 1363 - description: JobName is the name of the Kubernetes Job created 1364 - for this workflow. 1434 + description: |- 1435 + JobName is the name of the Kubernetes Job created for this workflow. 1436 + For multi-arch workflows, this is empty; use MatrixLegStatuses instead. 1365 1437 type: string 1438 + matrixLegStatuses: 1439 + description: MatrixLegStatuses tracks per-architecture Job statuses 1440 + for multi-arch workflows. 1441 + items: 1442 + description: MatrixLegStatus tracks the status of a single 1443 + matrix leg Job. 1444 + properties: 1445 + architecture: 1446 + description: Architecture is the target architecture for 1447 + this leg. 1448 + type: string 1449 + image: 1450 + description: Image is the container image used for this 1451 + leg. 1452 + type: string 1453 + jobName: 1454 + description: JobName is the name of the Kubernetes Job 1455 + for this leg. 1456 + type: string 1457 + phase: 1458 + description: Phase is the current phase (Pending, Running, 1459 + Succeeded, Failed). 1460 + type: string 1461 + required: 1462 + - architecture 1463 + type: object 1464 + type: array 1366 1465 name: 1367 1466 description: Name is the workflow name. 1368 1467 type: string
+17
helm/loom/templates/grpc_service.yaml
··· 1 + apiVersion: v1 2 + kind: Service 3 + metadata: 4 + name: {{ include "loom.fullname" . }}-grpc 5 + namespace: {{ .Release.Namespace }} 6 + labels: 7 + {{- include "loom.labels" . | nindent 4 }} 8 + app.kubernetes.io/component: grpc 9 + spec: 10 + type: ClusterIP 11 + selector: 12 + {{- include "loom.controllerLabels" . | nindent 4 }} 13 + ports: 14 + - name: grpc 15 + port: {{ .Values.grpc.port | default 9090 }} 16 + protocol: TCP 17 + targetPort: {{ .Values.grpc.port | default 9090 }}
+5 -1
helm/loom/values.yaml
··· 2 2 3 3 # Image configuration 4 4 image: 5 - repository: atcr.io/evan.jarrett.net/loom 5 + repository: buoy.cr/evan.jarrett.net/loom 6 6 pullPolicy: Always 7 7 # Overrides the image tag whose default is the chart appVersion. 8 8 tag: "" ··· 104 104 service: 105 105 type: ClusterIP 106 106 port: 6555 107 + 108 + # gRPC service for runner communication 109 + grpc: 110 + port: 9090 107 111 108 112 # RBAC configuration 109 113 rbac:
+190 -69
internal/controller/spindleset_controller.go
··· 24 24 "time" 25 25 26 26 "github.com/cenkalti/backoff/v4" 27 + "github.com/go-logr/logr" 27 28 "tangled.org/core/spindle" 28 29 "tangled.org/core/spindle/models" 29 30 ··· 33 34 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 34 35 "k8s.io/apimachinery/pkg/runtime" 35 36 "k8s.io/client-go/rest" 37 + "k8s.io/client-go/util/retry" 36 38 ctrl "sigs.k8s.io/controller-runtime" 37 39 "sigs.k8s.io/controller-runtime/pkg/client" 38 40 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ··· 57 59 // LoomImage is the loom image containing the runner binary 58 60 // Set from LOOM_IMAGE environment variable 59 61 LoomImage string 62 + 63 + // OperatorAddr is the gRPC address of the operator for runner communication 64 + OperatorAddr string 60 65 61 66 // Track watched Jobs for status reporting 62 67 watchedJobs sync.Map // map[string]models.WorkflowId ··· 175 180 logger.Error(err, "Failed to monitor job statuses") 176 181 } 177 182 183 + // For multi-arch pipelines, create final Jobs once all matrix legs succeed 184 + if err := r.ensureFinalJobs(ctx, spindleSet); err != nil { 185 + logger.Error(err, "Failed to ensure final Jobs") 186 + } 187 + 178 188 if jobsErr != nil { 179 189 return ctrl.Result{}, jobsErr 180 190 } ··· 264 274 return backoff.Retry(operation, backoff.WithContext(bo, ctx)) 265 275 } 266 276 267 - // updateStatus updates the SpindleSet status based on current Jobs 277 + // updateStatus updates the SpindleSet status based on current Jobs. 278 + // Status updates retry on conflict to handle races with concurrent reconciles 279 + // (e.g., triggered by Job creation/updates via Owns). 268 280 func (r *SpindleSetReconciler) updateStatus(ctx context.Context, spindleSet *loomv1alpha1.SpindleSet) error { 269 281 logger := log.FromContext(ctx) 270 282 271 - // Re-fetch the SpindleSet to get the latest version before updating status 272 - // This avoids optimistic concurrency conflicts when the object was modified 273 - // by another reconciliation loop (e.g., triggered by Job creation/updates) 274 - latestSpindleSet := &loomv1alpha1.SpindleSet{} 275 - if err := r.Get(ctx, client.ObjectKeyFromObject(spindleSet), latestSpindleSet); err != nil { 276 - return fmt.Errorf("failed to fetch latest SpindleSet: %w", err) 277 - } 278 - // Use the latest version for all subsequent operations 279 - spindleSet = latestSpindleSet 283 + return retry.RetryOnConflict(retry.DefaultRetry, func() error { 284 + // Re-fetch the SpindleSet on each attempt to get the latest version 285 + latestSpindleSet := &loomv1alpha1.SpindleSet{} 286 + if err := r.Get(ctx, client.ObjectKeyFromObject(spindleSet), latestSpindleSet); err != nil { 287 + return fmt.Errorf("failed to fetch latest SpindleSet: %w", err) 288 + } 289 + return r.computeAndApplyStatus(ctx, logger, latestSpindleSet) 290 + }) 291 + } 280 292 293 + // computeAndApplyStatus recomputes status from Jobs and applies it to spindleSet. 294 + func (r *SpindleSetReconciler) computeAndApplyStatus(ctx context.Context, logger logr.Logger, spindleSet *loomv1alpha1.SpindleSet) error { 281 295 // List all Jobs owned by this SpindleSet 282 296 jobList := &batchv1.JobList{} 283 297 if err := r.List(ctx, jobList, client.InNamespace(spindleSet.Namespace), client.MatchingLabels{ ··· 474 488 return fmt.Errorf("failed to list nodes: %w", err) 475 489 } 476 490 477 - // Convert workflow steps to jobbuilder format and create Jobs for each workflow 491 + // Convert workflow steps to jobbuilder format and create Jobs for each workflow. 492 + // For multi-arch pipelines, final Jobs are gated on all matrix leg Jobs succeeding. 478 493 for _, workflowSpec := range pipelineRun.Workflows { 479 - // Check if Job already exists 480 - jobName := fmt.Sprintf("spindle-%s-%s", pipelineRun.PipelineID, workflowSpec.Name) 481 - if len(jobName) > 63 { 482 - jobName = jobName[:63] 494 + // For multi-arch: skip final workflows initially — they are created 495 + // by ensureFinalJobs once all matrix leg Jobs have succeeded. 496 + if workflowSpec.IsFinal { 497 + continue 483 498 } 484 499 485 - existingJob := &batchv1.Job{} 486 - err := r.Get(ctx, client.ObjectKey{ 487 - Name: jobName, 488 - Namespace: spindleSet.Namespace, 489 - }, existingJob) 500 + if err := r.createWorkflowJob(ctx, spindleSet, pipelineRun, workflowSpec, secretName, secretKeys, &nodeList); err != nil { 501 + return err 502 + } 503 + } 504 + 505 + return nil 506 + } 507 + 508 + // createWorkflowJob creates a single Kubernetes Job for a workflow spec. 509 + func (r *SpindleSetReconciler) createWorkflowJob(ctx context.Context, spindleSet *loomv1alpha1.SpindleSet, pipelineRun *loomv1alpha1.PipelineRunSpec, workflowSpec loomv1alpha1.WorkflowSpec, secretName string, secretKeys []string, nodeList *corev1.NodeList) error { 510 + logger := log.FromContext(ctx) 490 511 491 - if err == nil { 492 - // Job already exists 493 - logger.V(1).Info("Job already exists for workflow", "workflow", workflowSpec.Name, "job", jobName) 494 - continue 512 + // Check if Job already exists 513 + jobName := fmt.Sprintf("spindle-%s-%s", pipelineRun.PipelineID, workflowSpec.Name) 514 + if len(jobName) > 63 { 515 + jobName = jobName[:63] 516 + } 517 + 518 + existingJob := &batchv1.Job{} 519 + err := r.Get(ctx, client.ObjectKey{ 520 + Name: jobName, 521 + Namespace: spindleSet.Namespace, 522 + }, existingJob) 523 + 524 + if err == nil { 525 + logger.V(1).Info("Job already exists for workflow", "workflow", workflowSpec.Name, "job", jobName) 526 + return nil 527 + } 528 + 529 + if !apierrors.IsNotFound(err) { 530 + return fmt.Errorf("failed to check for existing job: %w", err) 531 + } 532 + 533 + // Convert workflow steps to jobbuilder format 534 + jobSteps := make([]jobbuilder.WorkflowStep, 0, len(workflowSpec.Steps)) 535 + for _, step := range workflowSpec.Steps { 536 + jobSteps = append(jobSteps, jobbuilder.WorkflowStep{ 537 + Name: step.Name, 538 + Command: step.Command, 539 + Env: step.Environment, 540 + }) 541 + } 542 + 543 + // Build Job configuration 544 + jobConfig := jobbuilder.WorkflowConfig{ 545 + WorkflowName: workflowSpec.Name, 546 + PipelineID: pipelineRun.PipelineID, 547 + SpindleSetName: spindleSet.Name, 548 + Image: workflowSpec.Image, 549 + LoomImage: r.LoomImage, 550 + Architecture: workflowSpec.Architecture, 551 + Steps: jobSteps, 552 + WorkflowSpec: workflowSpec, 553 + CloneCommands: pipelineRun.CloneCommands, 554 + SkipClone: pipelineRun.SkipClone, 555 + SecretName: secretName, 556 + SecretKeys: secretKeys, 557 + Template: spindleSet.Spec.Template, 558 + Namespace: spindleSet.Namespace, 559 + OperatorAddr: r.OperatorAddr, 560 + } 561 + 562 + // Create the Job 563 + job, err := jobbuilder.BuildJob(jobConfig, nodeList) 564 + if err != nil { 565 + return fmt.Errorf("failed to build job for workflow %s: %w", workflowSpec.Name, err) 566 + } 567 + 568 + // Set SpindleSet as owner of the Job 569 + if err := controllerutil.SetControllerReference(spindleSet, job, r.Scheme); err != nil { 570 + return fmt.Errorf("failed to set controller reference: %w", err) 571 + } 572 + 573 + logger.Info("Creating Job for workflow", "workflow", workflowSpec.Name, "job", job.Name) 574 + if err := r.retryCreate(ctx, job); err != nil { 575 + if apierrors.IsAlreadyExists(err) { 576 + logger.Info("Job already exists, skipping creation", "workflow", workflowSpec.Name, "job", job.Name) 577 + return nil 495 578 } 579 + return fmt.Errorf("failed to create job for workflow %s: %w", workflowSpec.Name, err) 580 + } 581 + 582 + logger.Info("Job created successfully", "workflow", workflowSpec.Name, "job", job.Name) 583 + return nil 584 + } 585 + 586 + // ensureFinalJobs creates final Jobs for multi-arch pipelines once all matrix legs have succeeded. 587 + // This is called during reconciliation after job status monitoring. 588 + func (r *SpindleSetReconciler) ensureFinalJobs(ctx context.Context, spindleSet *loomv1alpha1.SpindleSet) error { 589 + logger := log.FromContext(ctx) 590 + pipelineRun := spindleSet.Spec.PipelineRun 591 + 592 + if !pipelineRun.MultiArch { 593 + return nil 594 + } 496 595 497 - if !apierrors.IsNotFound(err) { 498 - return fmt.Errorf("failed to check for existing job: %w", err) 596 + // Find the final workflow spec 597 + var finalSpec *loomv1alpha1.WorkflowSpec 598 + for i := range pipelineRun.Workflows { 599 + if pipelineRun.Workflows[i].IsFinal { 600 + finalSpec = &pipelineRun.Workflows[i] 601 + break 499 602 } 603 + } 604 + if finalSpec == nil { 605 + return nil // No final step defined 606 + } 500 607 501 - // Convert workflow steps to jobbuilder format 502 - jobSteps := make([]jobbuilder.WorkflowStep, 0, len(workflowSpec.Steps)) 503 - for _, step := range workflowSpec.Steps { 504 - jobSteps = append(jobSteps, jobbuilder.WorkflowStep{ 505 - Name: step.Name, 506 - Command: step.Command, 507 - Env: step.Environment, 508 - }) 608 + // Check if final Job already exists 609 + finalJobName := fmt.Sprintf("spindle-%s-%s", pipelineRun.PipelineID, finalSpec.Name) 610 + if len(finalJobName) > 63 { 611 + finalJobName = finalJobName[:63] 612 + } 613 + existingJob := &batchv1.Job{} 614 + if err := r.Get(ctx, client.ObjectKey{Name: finalJobName, Namespace: spindleSet.Namespace}, existingJob); err == nil { 615 + return nil // Already created 616 + } 617 + 618 + // Check if all matrix leg Jobs have succeeded 619 + jobList := &batchv1.JobList{} 620 + if err := r.List(ctx, jobList, 621 + client.InNamespace(spindleSet.Namespace), 622 + client.MatchingLabels{"loom.j5t.io/spindleset": spindleSet.Name}, 623 + ); err != nil { 624 + return fmt.Errorf("failed to list jobs: %w", err) 625 + } 626 + 627 + legCount := 0 628 + succeededCount := 0 629 + for _, job := range jobList.Items { 630 + // Count only matrix leg jobs (not the final job) 631 + wfName := job.Labels["loom.j5t.io/workflow"] 632 + for _, wf := range pipelineRun.Workflows { 633 + if wf.Name == wfName && wf.IsMatrixLeg { 634 + legCount++ 635 + if job.Status.Succeeded > 0 { 636 + succeededCount++ 637 + } 638 + break 639 + } 509 640 } 641 + } 510 642 511 - // Build Job configuration 512 - jobConfig := jobbuilder.WorkflowConfig{ 513 - WorkflowName: workflowSpec.Name, 514 - PipelineID: pipelineRun.PipelineID, 515 - SpindleSetName: spindleSet.Name, 516 - Image: workflowSpec.Image, 517 - LoomImage: r.LoomImage, 518 - Architecture: workflowSpec.Architecture, 519 - Steps: jobSteps, 520 - WorkflowSpec: workflowSpec, // Pass full workflow spec to runner 521 - CloneCommands: pipelineRun.CloneCommands, 522 - SkipClone: pipelineRun.SkipClone, 523 - SecretName: secretName, // Name of K8s Secret to inject (empty if no secrets) 524 - SecretKeys: secretKeys, // Secret env var names for log masking 525 - Template: spindleSet.Spec.Template, 526 - Namespace: spindleSet.Namespace, 643 + // Count expected legs 644 + expectedLegs := 0 645 + for _, wf := range pipelineRun.Workflows { 646 + if wf.IsMatrixLeg { 647 + expectedLegs++ 527 648 } 649 + } 528 650 529 - // Create the Job 530 - job, err := jobbuilder.BuildJob(jobConfig, &nodeList) 531 - if err != nil { 532 - return fmt.Errorf("failed to build job for workflow %s: %w", workflowSpec.Name, err) 533 - } 651 + if legCount < expectedLegs || succeededCount < expectedLegs { 652 + logger.V(1).Info("Waiting for matrix legs to complete", 653 + "expected", expectedLegs, "found", legCount, "succeeded", succeededCount) 654 + return nil // Not all legs have completed yet 655 + } 534 656 535 - // Set SpindleSet as owner of the Job 536 - if err := controllerutil.SetControllerReference(spindleSet, job, r.Scheme); err != nil { 537 - return fmt.Errorf("failed to set controller reference: %w", err) 538 - } 657 + logger.Info("All matrix legs succeeded, creating final Job", "legs", expectedLegs) 539 658 540 - logger.Info("Creating Job for workflow", "workflow", workflowSpec.Name, "job", job.Name) 541 - if err := r.retryCreate(ctx, job); err != nil { 542 - if apierrors.IsAlreadyExists(err) { 543 - // Job already exists (possibly from previous deployment), skip 544 - logger.Info("Job already exists, skipping creation", "workflow", workflowSpec.Name, "job", job.Name) 545 - continue 546 - } 547 - return fmt.Errorf("failed to create job for workflow %s: %w", workflowSpec.Name, err) 659 + // Build secret info 660 + secretName := "" 661 + var secretKeys []string 662 + if len(pipelineRun.Secrets) > 0 { 663 + secretName = fmt.Sprintf("%s-secrets", spindleSet.Name) 664 + for _, s := range pipelineRun.Secrets { 665 + secretKeys = append(secretKeys, s.Key) 548 666 } 667 + } 549 668 550 - logger.Info("Job created successfully", "workflow", workflowSpec.Name, "job", job.Name) 669 + var nodeList corev1.NodeList 670 + if err := r.List(ctx, &nodeList); err != nil { 671 + return fmt.Errorf("failed to list nodes: %w", err) 551 672 } 552 673 553 - return nil 674 + return r.createWorkflowJob(ctx, spindleSet, pipelineRun, *finalSpec, secretName, secretKeys, &nodeList) 554 675 } 555 676 556 677 // cleanupOrphanedJobs cleans up Jobs without a matching SpindleSet
+419 -274
internal/engine/kubernetes_engine.go
··· 1 1 package engine 2 2 3 3 import ( 4 - "bufio" 5 4 "context" 6 - "encoding/json" 7 5 "fmt" 8 - "io" 9 6 "maps" 10 7 "strings" 11 - "sync" 12 8 "time" 13 9 14 10 securejoin "github.com/cyphar/filepath-securejoin" 15 11 "gopkg.in/yaml.v3" 16 - batchv1 "k8s.io/api/batch/v1" 17 - corev1 "k8s.io/api/core/v1" 18 12 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 - "k8s.io/client-go/kubernetes" 20 13 "k8s.io/client-go/rest" 21 14 "sigs.k8s.io/controller-runtime/pkg/client" 22 15 "sigs.k8s.io/controller-runtime/pkg/log" ··· 26 19 "tangled.org/core/spindle/secrets" 27 20 28 21 loomv1alpha1 "tangled.org/evan.jarrett.net/loom/api/v1alpha1" 22 + loomgrpc "tangled.org/evan.jarrett.net/loom/internal/grpc" 29 23 ) 30 24 31 - // workflowLogStream holds the state for streaming logs from a workflow's pod 32 - type workflowLogStream struct { 33 - scanner *bufio.Scanner 34 - stream io.ReadCloser 35 - pod *corev1.Pod 36 - podPhase corev1.PodPhase // Track pod phase at stream creation time 25 + // syntheticStep is a minimal implementation of models.Step used to emit 26 + // ControlWriter entries for matrix-leg user steps (which are invisible to 27 + // the upstream spindle framework because loom wraps them in synthetic 28 + // "Matrix build" / "Final" framework steps). 29 + type syntheticStep struct { 30 + name string 31 + command string 32 + kind models.StepKind 37 33 } 38 34 39 - // extendedLogLine extends models.LogLine with exit code for error reporting 40 - type extendedLogLine struct { 41 - models.LogLine 42 - ExitCode int `json:"exit_code,omitempty"` 35 + func (s syntheticStep) Name() string { return s.name } 36 + func (s syntheticStep) Command() string { return s.command } 37 + func (s syntheticStep) Kind() models.StepKind { return s.kind } 38 + 39 + // matrixLegLogStepID returns a collision-free log step id for a matrix leg's 40 + // user step. The offset pushes us past the two framework steps (0, 1) that 41 + // the upstream engine already emits ("Matrix build", "Final"). 42 + func matrixLegLogStepID(legIdx, userStepIdx int) int { 43 + return 1000 + legIdx*100 + userStepIdx 44 + } 45 + 46 + // finalLogStepID returns a collision-free log step id for a final-phase step, 47 + // placed after the matrix-leg id range. 48 + func finalLogStepID(numLegs, stepIdx int) int { 49 + return 1000 + (numLegs+1)*100 + stepIdx 43 50 } 44 51 45 52 // KubernetesEngine implements the spindle Engine interface for Kubernetes Jobs. ··· 49 56 namespace string 50 57 template loomv1alpha1.SpindleTemplate 51 58 vault secrets.Manager 59 + hub *loomgrpc.Hub 60 + artifacts *loomgrpc.ArtifactStore 52 61 53 62 // Track created SpindleSets for cleanup 54 63 spindleSets map[string]*loomv1alpha1.SpindleSet 55 - 56 - // Active log streams per workflow - persist across RunStep calls 57 - logStreams map[string]*workflowLogStream 58 - streamMutex sync.RWMutex 59 64 } 60 65 61 66 // NewKubernetesEngine creates a new Kubernetes-based spindle engine. 62 - func NewKubernetesEngine(k8sClient client.Client, config *rest.Config, namespace string, template loomv1alpha1.SpindleTemplate, vault secrets.Manager) *KubernetesEngine { 67 + func NewKubernetesEngine(k8sClient client.Client, config *rest.Config, namespace string, template loomv1alpha1.SpindleTemplate, vault secrets.Manager, hub *loomgrpc.Hub, artifacts *loomgrpc.ArtifactStore) *KubernetesEngine { 63 68 return &KubernetesEngine{ 64 69 client: k8sClient, 65 70 config: config, 66 71 namespace: namespace, 67 72 template: template, 68 73 vault: vault, 74 + hub: hub, 75 + artifacts: artifacts, 69 76 spindleSets: make(map[string]*loomv1alpha1.SpindleSet), 70 - logStreams: make(map[string]*workflowLogStream), 71 77 } 72 78 } 73 79 80 + // StringOrSlice is a YAML type that accepts either a single string or an array of strings. 81 + type StringOrSlice []string 82 + 83 + func (s *StringOrSlice) UnmarshalYAML(node *yaml.Node) error { 84 + if node.Kind == yaml.ScalarNode { 85 + *s = []string{node.Value} 86 + return nil 87 + } 88 + var slice []string 89 + if err := node.Decode(&slice); err != nil { 90 + return err 91 + } 92 + *s = slice 93 + return nil 94 + } 95 + 96 + // rawWorkflowSpec is used for initial YAML parsing before matrix expansion. 97 + // It handles the polymorphic image/architecture fields. 98 + type rawWorkflowSpec struct { 99 + Image StringOrSlice `yaml:"image"` 100 + Architecture StringOrSlice `yaml:"architecture"` 101 + Steps []loomv1alpha1.WorkflowStep `yaml:"steps"` 102 + When []loomv1alpha1.WorkflowWhen `yaml:"when"` 103 + Environment map[string]string `yaml:"environment"` 104 + Dependencies *loomv1alpha1.WorkflowDependencies `yaml:"dependencies"` 105 + Final *loomv1alpha1.FinalSpec `yaml:"final"` 106 + } 107 + 108 + // MatrixLeg represents a single combination of image and architecture. 109 + type MatrixLeg struct { 110 + Image string 111 + Architecture string 112 + } 113 + 114 + // expandMatrix computes the cartesian product of images and architectures. 115 + func expandMatrix(images, architectures StringOrSlice) []MatrixLeg { 116 + var legs []MatrixLeg 117 + for _, img := range images { 118 + for _, arch := range architectures { 119 + legs = append(legs, MatrixLeg{Image: img, Architecture: arch}) 120 + } 121 + } 122 + return legs 123 + } 124 + 74 125 // kubernetesWorkflowData holds pre-computed data for workflow execution. 75 126 // Built in InitWorkflow, consumed in SetupWorkflow. 76 127 type kubernetesWorkflowData struct { 77 128 Spec loomv1alpha1.WorkflowSpec 78 129 CloneStep models.CloneStep // empty if clone should be skipped 130 + 131 + // Multi-arch fields 132 + IsMultiArch bool 133 + MatrixLegs []MatrixLeg 134 + FinalSpec *loomv1alpha1.FinalSpec 79 135 } 80 136 81 137 // SimpleStep implements the models.Step interface. ··· 100 156 // InitWorkflow parses the workflow YAML and initializes a Workflow model. 101 157 // Pipeline environment variables (TANGLED_*) are injected into workflow.Environment 102 158 // by the framework after this method returns. 159 + // 160 + // For multi-arch workflows (image or architecture is an array), the matrix is expanded 161 + // and stored in kubernetesWorkflowData. The framework sees synthetic steps: 162 + // - Step 0: "Matrix build" (engine fans out to N parallel Jobs, waits for all) 163 + // - Step 1: "Final" (engine creates final Job, waits for completion) — only if final block exists 103 164 func (e *KubernetesEngine) InitWorkflow(twf tangled.Pipeline_Workflow, tpl tangled.Pipeline) (*models.Workflow, error) { 104 - // Parse the Raw YAML into the unified WorkflowSpec type 105 - var spec loomv1alpha1.WorkflowSpec 106 - if err := yaml.Unmarshal([]byte(twf.Raw), &spec); err != nil { 165 + // Parse YAML with polymorphic image/architecture handling 166 + var raw rawWorkflowSpec 167 + if err := yaml.Unmarshal([]byte(twf.Raw), &raw); err != nil { 107 168 return nil, fmt.Errorf("failed to parse workflow YAML: %w", err) 108 169 } 109 170 110 - // Set the workflow name from the tangled workflow 111 - spec.Name = twf.Name 171 + if len(raw.Image) == 0 { 172 + return nil, fmt.Errorf("workflow must specify an 'image' field") 173 + } 174 + if len(raw.Architecture) == 0 { 175 + raw.Architecture = StringOrSlice{"amd64"} 176 + } 177 + 178 + // Build clone step 179 + var cloneStep models.CloneStep 180 + if twf.Clone == nil || !twf.Clone.Skip { 181 + cloneStep = models.BuildCloneStep(twf, *tpl.TriggerMetadata, false) 182 + } 183 + 184 + // Determine if this is a multi-arch workflow 185 + legs := expandMatrix(raw.Image, raw.Architecture) 186 + isMultiArch := len(legs) > 1 || raw.Final != nil 112 187 113 - // Validate required fields 114 - if spec.Image == "" { 115 - return nil, fmt.Errorf("workflow must specify an 'image' field") 188 + if isMultiArch { 189 + return e.initMultiArchWorkflow(twf.Name, raw, legs, cloneStep) 116 190 } 117 191 118 - // Default architecture to amd64 if not specified 119 - if spec.Architecture == "" { 120 - spec.Architecture = "amd64" 192 + // Single-arch workflow: existing behavior 193 + spec := loomv1alpha1.WorkflowSpec{ 194 + Name: twf.Name, 195 + Image: raw.Image[0], 196 + Architecture: raw.Architecture[0], 197 + Steps: raw.Steps, 198 + When: raw.When, 199 + Environment: raw.Environment, 200 + Dependencies: raw.Dependencies, 121 201 } 122 202 123 - // Convert steps to models.Step interface 124 203 modelSteps := make([]models.Step, 0, len(spec.Steps)) 125 204 for _, stepSpec := range spec.Steps { 126 205 modelSteps = append(modelSteps, SimpleStep{ ··· 130 209 }) 131 210 } 132 211 133 - // Build clone step (uses upstream models.BuildCloneStep which is self-contained) 134 - var cloneStep models.CloneStep 135 - devMode := false // TODO: Make this configurable 136 - 137 - if twf.Clone == nil || !twf.Clone.Skip { 138 - cloneStep = models.BuildCloneStep(twf, *tpl.TriggerMetadata, devMode) 139 - } 140 - 141 - // Store pre-computed workflow data 142 212 workflowData := &kubernetesWorkflowData{ 143 213 Spec: spec, 144 214 CloneStep: cloneStep, 145 215 } 146 216 147 - // Set engine-specific environment variables on the workflow 148 - // These will be merged with pipeline env vars by the framework 149 217 workflowEnv := map[string]string{ 150 218 "TANGLED_ARCHITECTURE": spec.Architecture, 151 - // HOME must be writable; we run as user 10000 so default /root won't work 152 - "HOME": "/tmp", 219 + "HOME": "/tmp", 153 220 } 154 221 155 - workflow := &models.Workflow{ 222 + return &models.Workflow{ 156 223 Steps: modelSteps, 157 224 Name: twf.Name, 158 225 Data: workflowData, 159 226 Environment: workflowEnv, 227 + }, nil 228 + } 229 + 230 + // initMultiArchWorkflow creates a Workflow with synthetic steps for matrix execution. 231 + func (e *KubernetesEngine) initMultiArchWorkflow(name string, raw rawWorkflowSpec, legs []MatrixLeg, cloneStep models.CloneStep) (*models.Workflow, error) { 232 + // Use the first leg's architecture for the spec that gets stored 233 + // (the actual per-leg specs are built in SetupWorkflow) 234 + spec := loomv1alpha1.WorkflowSpec{ 235 + Name: name, 236 + Image: raw.Image[0], 237 + Architecture: raw.Architecture[0], 238 + Steps: raw.Steps, 239 + When: raw.When, 240 + Environment: raw.Environment, 241 + Dependencies: raw.Dependencies, 242 + Final: raw.Final, 160 243 } 161 244 162 - return workflow, nil 245 + // Build synthetic steps that the framework will iterate over 246 + var modelSteps []models.Step 247 + 248 + // Step 0: matrix build phase (fans out to N parallel Jobs) 249 + archList := make([]string, len(legs)) 250 + for i, leg := range legs { 251 + archList[i] = leg.Architecture 252 + } 253 + modelSteps = append(modelSteps, SimpleStep{ 254 + StepName: fmt.Sprintf("Matrix build (%s)", strings.Join(archList, ", ")), 255 + StepCommand: "# internal: matrix fan-out", 256 + StepKind: models.StepKindUser, 257 + }) 258 + 259 + // Step 1: final phase (if final block exists) 260 + if raw.Final != nil { 261 + modelSteps = append(modelSteps, SimpleStep{ 262 + StepName: "Final", 263 + StepCommand: "# internal: final step", 264 + StepKind: models.StepKindUser, 265 + }) 266 + } 267 + 268 + workflowData := &kubernetesWorkflowData{ 269 + Spec: spec, 270 + CloneStep: cloneStep, 271 + IsMultiArch: true, 272 + MatrixLegs: legs, 273 + FinalSpec: raw.Final, 274 + } 275 + 276 + workflowEnv := map[string]string{ 277 + "TANGLED_ARCHITECTURE": raw.Architecture[0], 278 + "HOME": "/tmp", 279 + } 280 + 281 + return &models.Workflow{ 282 + Steps: modelSteps, 283 + Name: name, 284 + Data: workflowData, 285 + Environment: workflowEnv, 286 + }, nil 163 287 } 164 288 165 289 // SetupWorkflow creates a SpindleSet CR for the workflow. ··· 223 347 } 224 348 225 349 // Build PipelineRunSpec from pre-computed data 226 - // Knot is extracted from the pipeline ID provided by the framework 227 350 skipClone := len(data.CloneStep.Commands()) == 0 351 + var workflows []loomv1alpha1.WorkflowSpec 352 + 353 + if data.IsMultiArch { 354 + // Expand matrix into per-leg WorkflowSpecs 355 + for _, leg := range data.MatrixLegs { 356 + legSpec := loomv1alpha1.WorkflowSpec{ 357 + Name: fmt.Sprintf("%s-%s", data.Spec.Name, leg.Architecture), 358 + Image: leg.Image, 359 + Architecture: leg.Architecture, 360 + Steps: data.Spec.Steps, 361 + When: data.Spec.When, 362 + Environment: maps.Clone(data.Spec.Environment), 363 + Dependencies: data.Spec.Dependencies, 364 + IsMatrixLeg: true, 365 + } 366 + // Override per-leg env vars 367 + if legSpec.Environment == nil { 368 + legSpec.Environment = make(map[string]string) 369 + } 370 + legSpec.Environment["TANGLED_ARCHITECTURE"] = leg.Architecture 371 + legSpec.Environment["TANGLED_IMAGE"] = leg.Image 372 + legSpec.Environment["LOOM_MATRIX_LEG"] = "true" 373 + legSpec.Environment["LOOM_ARTIFACTS"] = "/artifacts" 374 + workflows = append(workflows, legSpec) 375 + } 376 + 377 + // Add final WorkflowSpec if specified 378 + if data.FinalSpec != nil { 379 + finalImage := data.FinalSpec.Image 380 + if finalImage == "" { 381 + finalImage = data.MatrixLegs[0].Image 382 + } 383 + finalSpec := loomv1alpha1.WorkflowSpec{ 384 + Name: fmt.Sprintf("%s-final", data.Spec.Name), 385 + Image: finalImage, 386 + Architecture: data.FinalSpec.Architecture, 387 + Steps: data.FinalSpec.Steps, 388 + Environment: maps.Clone(data.Spec.Environment), 389 + IsFinal: true, 390 + } 391 + if finalSpec.Environment == nil { 392 + finalSpec.Environment = make(map[string]string) 393 + } 394 + finalSpec.Environment["TANGLED_ARCHITECTURE"] = data.FinalSpec.Architecture 395 + finalSpec.Environment["LOOM_FINAL"] = "true" 396 + finalSpec.Environment["LOOM_ARTIFACTS"] = "/artifacts" 397 + workflows = append(workflows, finalSpec) 398 + } 399 + 400 + logger.Info("Expanded multi-arch workflow", "legs", len(data.MatrixLegs), "hasFinal", data.FinalSpec != nil) 401 + } else { 402 + singleSpec := data.Spec 403 + if singleSpec.Environment == nil { 404 + singleSpec.Environment = make(map[string]string) 405 + } 406 + singleSpec.Environment["LOOM_ARTIFACTS"] = "/artifacts" 407 + workflows = []loomv1alpha1.WorkflowSpec{singleSpec} 408 + } 409 + 228 410 pipelineRun := &loomv1alpha1.PipelineRunSpec{ 229 411 PipelineID: wid.Rkey, 230 412 SkipClone: skipClone, 231 413 Secrets: repoSecrets, 232 - Workflows: []loomv1alpha1.WorkflowSpec{data.Spec}, 414 + Workflows: workflows, 415 + MultiArch: data.IsMultiArch, 233 416 } 234 417 235 418 // Add clone commands if not skipping ··· 342 525 // Remove from tracking map 343 526 delete(e.spindleSets, wid.String()) 344 527 345 - // Close any open log streams for this workflow 346 - e.closeLogStream(wid) 528 + // Clean up artifacts for this pipeline 529 + if e.artifacts != nil { 530 + if err := e.artifacts.Cleanup(wid.PipelineId.AtUri().String()); err != nil { 531 + logger.Error(err, "Failed to clean up artifacts") 532 + } 533 + } 347 534 348 535 logger.Info("SpindleSet cleaned up successfully") 349 536 return nil 350 537 } 351 538 352 - // RunStep streams logs for the specific step and waits for that step to complete. 353 - // For Kubernetes engine, all steps run in a single Job, but we stream logs incrementally 354 - // as each step executes. Each RunStep call blocks until that step's "end" control event is received. 539 + // RunStep waits for step completion events from the runner via the gRPC hub. 540 + // For single-arch workflows, blocks until the step's "end" control event. 541 + // For multi-arch workflows: 542 + // - idx 0: waits for ALL matrix leg runners to complete all their steps 543 + // - idx 1: waits for the final runner to complete all its steps 355 544 func (e *KubernetesEngine) RunStep(ctx context.Context, wid models.WorkflowId, w *models.Workflow, idx int, wfSecrets []secrets.UnlockedSecret, wfLogger models.WorkflowLogger) error { 356 545 logger := log.FromContext(ctx).WithValues("workflow", wid.Name, "pipeline", wid.Rkey, "step", idx) 357 546 358 - // Query for the Job created by SpindleSetReconciler (only on first step) 359 - var job *batchv1.Job 360 - if idx == 0 { 361 - spindleSet, err := e.getSpindleSet(ctx, wid) 362 - if err != nil { 363 - return err 364 - } 365 - if spindleSet == nil { 366 - return fmt.Errorf("no SpindleSet found for workflow %s", wid.String()) 367 - } 368 - 369 - // Wait for Job to be created by controller 370 - deadline := time.Now().Add(5 * time.Minute) 371 - for { 372 - if time.Now().After(deadline) { 373 - return fmt.Errorf("timeout waiting for Job to be created by controller") 374 - } 375 - 376 - jobList := &batchv1.JobList{} 377 - err := e.client.List(ctx, jobList, 378 - client.InNamespace(e.namespace), 379 - client.MatchingLabels{ 380 - "loom.j5t.io/spindleset": spindleSet.Name, 381 - "loom.j5t.io/workflow": w.Name, 382 - "loom.j5t.io/pipeline-id": wid.Rkey, 383 - }) 384 - if err != nil { 385 - return fmt.Errorf("failed to list jobs: %w", err) 386 - } 547 + data, ok := w.Data.(*kubernetesWorkflowData) 548 + if !ok { 549 + return fmt.Errorf("invalid workflow data type") 550 + } 387 551 388 - if len(jobList.Items) > 0 { 389 - job = &jobList.Items[0] 390 - break 391 - } 392 - 393 - time.Sleep(2 * time.Second) 394 - } 395 - 396 - logger.Info("Found Job for workflow", "jobName", job.Name) 552 + if data.IsMultiArch { 553 + return e.runMultiArchStep(ctx, wid, data, idx, wfLogger) 397 554 } 398 555 399 - // Get or create log stream (creates on first step, reuses on subsequent steps) 400 - stream, err := e.getOrCreateLogStream(ctx, wid, job) 401 - if err != nil { 402 - return fmt.Errorf("failed to get log stream: %w", err) 556 + // Single-arch: wait for one runner. 557 + // PipelineID must match what the runner sends — the framework injects 558 + // TANGLED_PIPELINE_ID as the full AT URI (see spindle/models.PipelineEnvVars), 559 + // and the runner uses that value when registering with the hub. 560 + key := loomgrpc.RunnerKey{ 561 + PipelineID: wid.PipelineId.AtUri().String(), 562 + WorkflowName: wid.Name, 563 + Architecture: data.Spec.Architecture, 403 564 } 404 565 405 - // Read from stream until this step's end event 406 - if wfLogger != nil { 407 - logger.Info("Reading logs for step", "stepID", idx) 408 - if err := e.readUntilStepEnd(ctx, stream, idx, w, wfLogger); err != nil { 409 - logger.Error(err, "Failed to read step logs") 410 - // Clean up stream on error 411 - e.closeLogStream(wid) 412 - return fmt.Errorf("failed to read logs for step %d: %w", idx, err) 566 + if idx == 0 { 567 + logger.Info("waiting for runner to connect", "key", key.String()) 568 + select { 569 + case <-e.hub.WaitForRunner(key): 570 + logger.Info("runner connected", "key", key.String()) 571 + case <-ctx.Done(): 572 + return fmt.Errorf("context canceled while waiting for runner: %w", ctx.Err()) 573 + case <-time.After(10 * time.Minute): 574 + return fmt.Errorf("timeout waiting for runner to connect") 413 575 } 414 - logger.Info("Step completed", "stepID", idx) 415 576 } 416 577 417 - // Clean up stream after last step 418 - if idx == len(w.Steps)-1 { 419 - logger.Info("Last step completed, closing log stream") 420 - e.closeLogStream(wid) 421 - } 578 + return e.waitForRunnerStep(ctx, key, idx, idx, wfLogger) 579 + } 422 580 423 - return nil 424 - } 581 + // runMultiArchStep handles RunStep for multi-arch workflows. 582 + func (e *KubernetesEngine) runMultiArchStep(ctx context.Context, wid models.WorkflowId, data *kubernetesWorkflowData, idx int, wfLogger models.WorkflowLogger) error { 583 + logger := log.FromContext(ctx).WithValues("workflow", wid.Name, "pipeline", wid.Rkey, "step", idx) 425 584 426 - // getOrCreateLogStream gets an existing log stream or creates a new one for step 0 427 - func (e *KubernetesEngine) getOrCreateLogStream(ctx context.Context, wid models.WorkflowId, job *batchv1.Job) (*workflowLogStream, error) { 428 - widKey := wid.String() 585 + if idx == 0 { 586 + // Matrix build phase: wait for all leg runners to complete all their steps 587 + logger.Info("waiting for matrix leg runners", "legs", len(data.MatrixLegs)) 429 588 430 - // Check if stream already exists 431 - e.streamMutex.RLock() 432 - stream, exists := e.logStreams[widKey] 433 - e.streamMutex.RUnlock() 589 + type legResult struct { 590 + leg MatrixLeg 591 + err error 592 + } 593 + results := make(chan legResult, len(data.MatrixLegs)) 434 594 435 - if exists { 436 - return stream, nil 437 - } 595 + for legIdx, leg := range data.MatrixLegs { 596 + go func() { 597 + legName := fmt.Sprintf("%s-%s", data.Spec.Name, leg.Architecture) 598 + key := loomgrpc.RunnerKey{ 599 + PipelineID: wid.PipelineId.AtUri().String(), 600 + WorkflowName: legName, 601 + Architecture: leg.Architecture, 602 + } 438 603 439 - // Create new stream 440 - logger := log.FromContext(ctx).WithValues("workflow", wid.Name, "pipeline", wid.Rkey) 604 + // Wait for this leg's runner to connect 605 + select { 606 + case <-e.hub.WaitForRunner(key): 607 + logger.Info("matrix leg runner connected", "key", key.String()) 608 + case <-ctx.Done(): 609 + results <- legResult{leg: leg, err: ctx.Err()} 610 + return 611 + case <-time.After(10 * time.Minute): 612 + results <- legResult{leg: leg, err: fmt.Errorf("timeout waiting for runner %s", key.String())} 613 + return 614 + } 441 615 442 - // Create kubernetes clientset for log streaming 443 - clientset, err := kubernetes.NewForConfig(e.config) 444 - if err != nil { 445 - return nil, fmt.Errorf("failed to create kubernetes clientset: %w", err) 446 - } 616 + // Wait for all steps in this leg to complete, emitting per-leg 617 + // control log lines so each architecture's output gets its own 618 + // step section in the rendered log. 619 + for stepIdx, userStep := range data.Spec.Steps { 620 + logStepID := matrixLegLogStepID(legIdx, stepIdx) 621 + sStep := syntheticStep{ 622 + name: fmt.Sprintf("%s (%s)", userStep.Name, leg.Architecture), 623 + command: userStep.Command, 624 + kind: models.StepKindUser, 625 + } 626 + if wfLogger != nil { 627 + _, _ = wfLogger.ControlWriter(logStepID, sStep, models.StepStatusStart).Write([]byte{0}) 628 + } 629 + err := e.waitForRunnerStep(ctx, key, stepIdx, logStepID, wfLogger) 630 + if wfLogger != nil { 631 + _, _ = wfLogger.ControlWriter(logStepID, sStep, models.StepStatusEnd).Write([]byte{0}) 632 + } 633 + if err != nil { 634 + results <- legResult{leg: leg, err: fmt.Errorf("leg %s step %q: %w", leg.Architecture, userStep.Name, err)} 635 + return 636 + } 637 + } 447 638 448 - // Wait for pod to be created 449 - var pod *corev1.Pod 450 - deadline := time.Now().Add(2 * time.Minute) 451 - for { 452 - if time.Now().After(deadline) { 453 - return nil, fmt.Errorf("timeout waiting for pod to be created") 639 + results <- legResult{leg: leg, err: nil} 640 + }() 454 641 } 455 642 456 - pods := &corev1.PodList{} 457 - err := e.client.List(ctx, pods, client.InNamespace(job.Namespace), client.MatchingLabels(job.Spec.Template.Labels)) 458 - if err != nil { 459 - return nil, fmt.Errorf("failed to list pods: %w", err) 643 + // Collect results from all legs 644 + var errs []error 645 + for range data.MatrixLegs { 646 + result := <-results 647 + if result.err != nil { 648 + logger.Error(result.err, "matrix leg failed", "arch", result.leg.Architecture) 649 + errs = append(errs, result.err) 650 + } else { 651 + logger.Info("matrix leg completed", "arch", result.leg.Architecture) 652 + } 460 653 } 461 654 462 - if len(pods.Items) > 0 { 463 - pod = &pods.Items[0] 464 - break 655 + if len(errs) > 0 { 656 + return fmt.Errorf("matrix build failed: %v", errs[0]) 465 657 } 466 658 467 - time.Sleep(1 * time.Second) 659 + logger.Info("all matrix legs completed successfully") 660 + return nil 468 661 } 469 662 470 - logger.Info("Found pod for job", "podName", pod.Name) 471 - 472 - // Wait for pod to be running (or completed) 473 - deadline = time.Now().Add(5 * time.Minute) 474 - for { 475 - if time.Now().After(deadline) { 476 - return nil, fmt.Errorf("timeout waiting for pod to start") 663 + if idx == 1 && data.FinalSpec != nil { 664 + // Final phase: wait for the final runner 665 + finalName := fmt.Sprintf("%s-final", data.Spec.Name) 666 + key := loomgrpc.RunnerKey{ 667 + PipelineID: wid.PipelineId.AtUri().String(), 668 + WorkflowName: finalName, 669 + Architecture: data.FinalSpec.Architecture, 477 670 } 478 671 479 - currentPod := &corev1.Pod{} 480 - err := e.client.Get(ctx, client.ObjectKey{ 481 - Namespace: pod.Namespace, 482 - Name: pod.Name, 483 - }, currentPod) 484 - if err != nil { 485 - return nil, fmt.Errorf("failed to get pod: %w", err) 672 + logger.Info("waiting for final runner to connect", "key", key.String()) 673 + select { 674 + case <-e.hub.WaitForRunner(key): 675 + logger.Info("final runner connected", "key", key.String()) 676 + case <-ctx.Done(): 677 + return fmt.Errorf("context canceled while waiting for final runner: %w", ctx.Err()) 678 + case <-time.After(10 * time.Minute): 679 + return fmt.Errorf("timeout waiting for final runner to connect") 486 680 } 487 681 488 - if currentPod.Status.Phase == corev1.PodRunning || currentPod.Status.Phase == corev1.PodSucceeded || currentPod.Status.Phase == corev1.PodFailed { 489 - pod = currentPod 490 - break 682 + // Stream artifacts from matrix legs to the final runner 683 + if e.artifacts != nil { 684 + rs := e.hub.Get(key) 685 + if rs != nil { 686 + logger.Info("streaming artifacts to final runner", "pipeline", wid.Rkey) 687 + if err := e.artifacts.StreamToRunner(wid.PipelineId.AtUri().String(), rs.SendToRunner); err != nil { 688 + logger.Error(err, "failed to stream artifacts to final runner") 689 + } 690 + } 491 691 } 492 692 493 - time.Sleep(1 * time.Second) 494 - } 693 + // Wait for all final steps, emitting per-step control log lines so the 694 + // final phase's user steps are visible in the rendered log (the 695 + // framework only emits a single "Final" control entry above us). 696 + for stepIdx, finalStep := range data.FinalSpec.Steps { 697 + logStepID := finalLogStepID(len(data.MatrixLegs), stepIdx) 698 + sStep := syntheticStep{ 699 + name: fmt.Sprintf("%s (final)", finalStep.Name), 700 + command: finalStep.Command, 701 + kind: models.StepKindUser, 702 + } 703 + if wfLogger != nil { 704 + _, _ = wfLogger.ControlWriter(logStepID, sStep, models.StepStatusStart).Write([]byte{0}) 705 + } 706 + err := e.waitForRunnerStep(ctx, key, stepIdx, logStepID, wfLogger) 707 + if wfLogger != nil { 708 + _, _ = wfLogger.ControlWriter(logStepID, sStep, models.StepStatusEnd).Write([]byte{0}) 709 + } 710 + if err != nil { 711 + return fmt.Errorf("final step %q: %w", finalStep.Name, err) 712 + } 713 + } 495 714 496 - // Only use Follow mode for running pods. For completed pods, we need to read 497 - // existing logs (Follow:true only streams NEW logs after connection). 498 - shouldFollow := pod.Status.Phase == corev1.PodRunning 499 - if !shouldFollow { 500 - logger.Info("Pod already completed, reading existing logs", "podName", pod.Name, "phase", pod.Status.Phase) 501 - } else { 502 - logger.Info("Pod is running, streaming logs", "podName", pod.Name, "phase", pod.Status.Phase) 715 + logger.Info("final steps completed") 716 + return nil 503 717 } 504 718 505 - // Stream logs from the main container (not init containers) 506 - req := clientset.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ 507 - Container: "runner", 508 - Follow: shouldFollow, 509 - }) 510 - 511 - logStream, err := req.Stream(ctx) 512 - if err != nil { 513 - return nil, fmt.Errorf("failed to open log stream: %w", err) 514 - } 515 - 516 - // Create scanner 517 - scanner := bufio.NewScanner(logStream) 518 - buf := make([]byte, 0, 64*1024) 519 - scanner.Buffer(buf, 1024*1024) 520 - 521 - // Create and store stream 522 - stream = &workflowLogStream{ 523 - scanner: scanner, 524 - stream: logStream, 525 - pod: pod, 526 - podPhase: pod.Status.Phase, 527 - } 528 - 529 - e.streamMutex.Lock() 530 - e.logStreams[widKey] = stream 531 - e.streamMutex.Unlock() 532 - 533 - return stream, nil 719 + return fmt.Errorf("unexpected step index %d for multi-arch workflow", idx) 534 720 } 535 721 536 - // closeLogStream closes and removes a log stream 537 - func (e *KubernetesEngine) closeLogStream(wid models.WorkflowId) { 538 - widKey := wid.String() 539 - 540 - e.streamMutex.Lock() 541 - defer e.streamMutex.Unlock() 542 - 543 - if stream, exists := e.logStreams[widKey]; exists { 544 - stream.stream.Close() 545 - delete(e.logStreams, widKey) 722 + // waitForRunnerStep reads events from a runner's gRPC stream until a specific 723 + // step completes. runnerStepID is matched against StepID fields on runner 724 + // events to filter out other steps' events; logStepID is the step id passed to 725 + // wfLogger.DataWriter when forwarding log content. For single-arch workflows 726 + // these are the same; for matrix legs they differ so each leg's logs land in 727 + // a distinct UI step. 728 + func (e *KubernetesEngine) waitForRunnerStep(ctx context.Context, key loomgrpc.RunnerKey, runnerStepID, logStepID int, wfLogger models.WorkflowLogger) error { 729 + rs := e.hub.Get(key) 730 + if rs == nil { 731 + return fmt.Errorf("runner not connected for %s", key.String()) 546 732 } 547 - } 548 - 549 - // readUntilStepEnd reads from the log stream until the end event for the specified step 550 - func (e *KubernetesEngine) readUntilStepEnd(ctx context.Context, stream *workflowLogStream, stepID int, workflow *models.Workflow, wfLogger models.WorkflowLogger) error { 551 - scanner := stream.scanner 552 733 553 - for scanner.Scan() { 554 - line := scanner.Text() 555 - 556 - // Try to parse as extendedLogLine from the runner binary (includes exit_code) 557 - var logLine extendedLogLine 558 - if err := json.Unmarshal([]byte(line), &logLine); err != nil { 559 - // Not JSON or parse error - skip 560 - continue 561 - } 562 - 563 - // Validate step index 564 - if logLine.StepId < 0 || logLine.StepId >= len(workflow.Steps) { 565 - continue 566 - } 567 - 568 - // Only process events for the current step 569 - if logLine.StepId != stepID { 570 - // Got event for a different step - this shouldn't happen in sequential execution 571 - // but log it and continue 572 - continue 573 - } 574 - 575 - switch logLine.Kind { 576 - case models.LogKindControl: 577 - // Use control events from runner for flow control only 578 - // Don't write them - the core spindle engine writes control events 579 - if logLine.StepStatus == models.StepStatusEnd { 580 - // Check exit code before returning success 581 - if logLine.ExitCode != 0 { 582 - return fmt.Errorf("step %d failed with exit code %d", stepID, logLine.ExitCode) 734 + for { 735 + select { 736 + case evt := <-rs.Steps: 737 + if evt.StepID != runnerStepID { 738 + continue 739 + } 740 + if evt.Status == "end" { 741 + if evt.ExitCode != 0 { 742 + return fmt.Errorf("step %d failed with exit code %d", runnerStepID, evt.ExitCode) 583 743 } 584 744 return nil 585 745 } 586 - // For "start" events, just continue reading 587 746 588 - case models.LogKindData: 589 - // Log output from step 590 - if logLine.Stream == "" { 591 - logLine.Stream = "stdout" // Default to stdout 747 + case logEvt := <-rs.Logs: 748 + if logEvt.StepID != runnerStepID || wfLogger == nil { 749 + continue 592 750 } 593 - dataWriter := wfLogger.DataWriter(logLine.StepId, logLine.Stream) 594 - _, _ = dataWriter.Write([]byte(logLine.Content + "\n")) 595 - } 596 - } 751 + stream := logEvt.Stream 752 + if stream == "" { 753 + stream = "stdout" 754 + } 755 + dataWriter := wfLogger.DataWriter(logStepID, stream) 756 + _, _ = dataWriter.Write([]byte(logEvt.Content + "\n")) 597 757 598 - if err := scanner.Err(); err != nil { 599 - // EOF or context canceled is expected when pod terminates 600 - if err != io.EOF && !strings.Contains(err.Error(), "context canceled") { 601 - return fmt.Errorf("error reading logs: %w", err) 602 - } 603 - } 758 + case <-rs.Done: 759 + return fmt.Errorf("runner disconnected before step %d completed", runnerStepID) 604 760 605 - // Scanner ended without seeing step end event. 606 - // Re-check current pod status - it may have completed since we started streaming. 607 - currentPod := &corev1.Pod{} 608 - if err := e.client.Get(ctx, client.ObjectKey{Namespace: stream.pod.Namespace, Name: stream.pod.Name}, currentPod); err == nil { 609 - if currentPod.Status.Phase == corev1.PodSucceeded { 610 - // Pod succeeded - treat as success even without control event 611 - return nil 612 - } 613 - if currentPod.Status.Phase == corev1.PodFailed { 614 - return fmt.Errorf("pod failed before step %d completed", stepID) 761 + case <-ctx.Done(): 762 + return fmt.Errorf("context canceled during step %d: %w", runnerStepID, ctx.Err()) 615 763 } 616 764 } 617 - 618 - // Pod status unknown or still running but stream ended unexpectedly 619 - return fmt.Errorf("log stream ended before step %d completed", stepID) 620 765 } 621 766 622 767 // Ensure KubernetesEngine implements the Engine interface
+187
internal/grpc/artifacts.go
··· 1 + package grpc 2 + 3 + import ( 4 + "fmt" 5 + "io" 6 + "os" 7 + "path/filepath" 8 + "strings" 9 + "sync" 10 + 11 + "sigs.k8s.io/controller-runtime/pkg/log" 12 + 13 + pb "tangled.org/evan.jarrett.net/loom/internal/pb/loom/v1" 14 + ) 15 + 16 + const artifactChunkSize = 32 * 1024 // 32KB chunks 17 + 18 + // ArtifactStore manages artifact files on disk, organized by pipeline/architecture. 19 + type ArtifactStore struct { 20 + baseDir string 21 + mu sync.RWMutex 22 + 23 + // Track open file writers for streaming artifact chunks 24 + writers map[string]*os.File 25 + writersMu sync.Mutex 26 + } 27 + 28 + // NewArtifactStore creates a new artifact store at the given base directory. 29 + func NewArtifactStore(baseDir string) (*ArtifactStore, error) { 30 + if err := os.MkdirAll(baseDir, 0755); err != nil { 31 + return nil, fmt.Errorf("failed to create artifact store directory: %w", err) 32 + } 33 + return &ArtifactStore{ 34 + baseDir: baseDir, 35 + writers: make(map[string]*os.File), 36 + }, nil 37 + } 38 + 39 + // artifactDir returns the directory for a pipeline/architecture combination. 40 + func (s *ArtifactStore) artifactDir(pipelineID, architecture string) string { 41 + return filepath.Join(s.baseDir, pipelineID, architecture) 42 + } 43 + 44 + // writerKey creates a unique key for tracking open file writers. 45 + func writerKey(pipelineID, architecture, path string) string { 46 + return fmt.Sprintf("%s/%s/%s", pipelineID, architecture, path) 47 + } 48 + 49 + // WriteChunk writes an artifact chunk to disk. Creates the file on first chunk, 50 + // appends on subsequent chunks, and closes on EOF. 51 + func (s *ArtifactStore) WriteChunk(pipelineID, architecture string, chunk ArtifactEvent) error { 52 + logger := log.Log.WithName("artifacts") 53 + 54 + dir := s.artifactDir(pipelineID, architecture) 55 + fullPath := filepath.Join(dir, chunk.Path) 56 + 57 + // Security: ensure the path doesn't escape the artifact directory 58 + cleanPath, err := filepath.Rel(dir, fullPath) 59 + if err != nil || strings.HasPrefix(cleanPath, "..") { 60 + return fmt.Errorf("invalid artifact path: %s", chunk.Path) 61 + } 62 + 63 + key := writerKey(pipelineID, architecture, chunk.Path) 64 + 65 + s.writersMu.Lock() 66 + defer s.writersMu.Unlock() 67 + 68 + f, exists := s.writers[key] 69 + if !exists { 70 + // Create directory structure and open file 71 + if err := os.MkdirAll(filepath.Dir(fullPath), 0755); err != nil { 72 + return fmt.Errorf("failed to create artifact directory: %w", err) 73 + } 74 + f, err = os.Create(fullPath) 75 + if err != nil { 76 + return fmt.Errorf("failed to create artifact file: %w", err) 77 + } 78 + s.writers[key] = f 79 + logger.Info("receiving artifact", "pipeline", pipelineID, "arch", architecture, "path", chunk.Path) 80 + } 81 + 82 + if len(chunk.Data) > 0 { 83 + if _, err := f.Write(chunk.Data); err != nil { 84 + return fmt.Errorf("failed to write artifact chunk: %w", err) 85 + } 86 + } 87 + 88 + if chunk.EOF { 89 + f.Close() 90 + delete(s.writers, key) 91 + logger.Info("artifact received", "pipeline", pipelineID, "arch", architecture, "path", chunk.Path) 92 + } 93 + 94 + return nil 95 + } 96 + 97 + // StreamToRunner sends all artifacts for a pipeline to a runner's SendToRunner channel. 98 + // Used by final jobs to receive artifacts from all matrix legs. 99 + func (s *ArtifactStore) StreamToRunner(pipelineID string, sendCh chan<- *pb.ConnectResponse) error { 100 + logger := log.Log.WithName("artifacts") 101 + 102 + pipelineDir := filepath.Join(s.baseDir, pipelineID) 103 + if _, err := os.Stat(pipelineDir); os.IsNotExist(err) { 104 + logger.Info("no artifacts found for pipeline", "pipeline", pipelineID) 105 + return nil 106 + } 107 + 108 + // Walk each architecture directory 109 + archDirs, err := os.ReadDir(pipelineDir) 110 + if err != nil { 111 + return fmt.Errorf("failed to read pipeline artifacts: %w", err) 112 + } 113 + 114 + for _, archDir := range archDirs { 115 + if !archDir.IsDir() { 116 + continue 117 + } 118 + arch := archDir.Name() 119 + archPath := filepath.Join(pipelineDir, arch) 120 + 121 + err := filepath.Walk(archPath, func(path string, info os.FileInfo, err error) error { 122 + if err != nil || info.IsDir() { 123 + return err 124 + } 125 + 126 + relPath, err := filepath.Rel(archPath, path) 127 + if err != nil { 128 + return err 129 + } 130 + 131 + f, err := os.Open(path) 132 + if err != nil { 133 + return fmt.Errorf("failed to open artifact %s: %w", path, err) 134 + } 135 + defer f.Close() 136 + 137 + buf := make([]byte, artifactChunkSize) 138 + for { 139 + n, readErr := f.Read(buf) 140 + isEOF := readErr == io.EOF 141 + 142 + sendCh <- &pb.ConnectResponse{ 143 + Event: &pb.ConnectResponse_ArtifactData{ 144 + ArtifactData: &pb.ArtifactData{ 145 + SourceArchitecture: arch, 146 + Path: relPath, 147 + Data: buf[:n], 148 + Eof: isEOF, 149 + }, 150 + }, 151 + } 152 + 153 + if isEOF { 154 + break 155 + } 156 + if readErr != nil { 157 + return fmt.Errorf("failed to read artifact %s: %w", path, readErr) 158 + } 159 + } 160 + 161 + logger.Info("streamed artifact to runner", "pipeline", pipelineID, "arch", arch, "path", relPath) 162 + return nil 163 + }) 164 + if err != nil { 165 + return err 166 + } 167 + } 168 + 169 + // Send a sentinel message so the runner knows artifact streaming is complete. 170 + // An empty Path with Eof=true signals "all artifacts sent". 171 + sendCh <- &pb.ConnectResponse{ 172 + Event: &pb.ConnectResponse_ArtifactData{ 173 + ArtifactData: &pb.ArtifactData{ 174 + Path: "", 175 + Eof: true, 176 + }, 177 + }, 178 + } 179 + 180 + return nil 181 + } 182 + 183 + // Cleanup removes all artifacts for a pipeline. 184 + func (s *ArtifactStore) Cleanup(pipelineID string) error { 185 + dir := filepath.Join(s.baseDir, pipelineID) 186 + return os.RemoveAll(dir) 187 + }
+142
internal/grpc/hub.go
··· 1 + package grpc 2 + 3 + import ( 4 + "fmt" 5 + "sync" 6 + 7 + pb "tangled.org/evan.jarrett.net/loom/internal/pb/loom/v1" 8 + ) 9 + 10 + // RunnerKey uniquely identifies a runner connection. 11 + type RunnerKey struct { 12 + PipelineID string 13 + WorkflowName string 14 + Architecture string 15 + } 16 + 17 + func (k RunnerKey) String() string { 18 + return fmt.Sprintf("%s/%s/%s", k.PipelineID, k.WorkflowName, k.Architecture) 19 + } 20 + 21 + // StepEvent represents a step lifecycle event received from a runner. 22 + type StepEvent struct { 23 + StepID int 24 + Status string // "start" or "end" 25 + ExitCode int 26 + } 27 + 28 + // LogEvent represents a log line received from a runner. 29 + type LogEvent struct { 30 + StepID int 31 + Stream string // "stdout" or "stderr" 32 + Content string 33 + } 34 + 35 + // ArtifactEvent represents an artifact chunk received from a runner. 36 + type ArtifactEvent struct { 37 + Path string 38 + Data []byte 39 + EOF bool 40 + } 41 + 42 + // RunnerStream holds the channels for a single runner connection. 43 + // The gRPC server writes to these channels; the engine reads from them. 44 + type RunnerStream struct { 45 + Steps chan StepEvent 46 + Logs chan LogEvent 47 + 48 + // SendToRunner allows the engine to send messages back to the runner. 49 + // The gRPC server reads from this channel and sends to the runner. 50 + SendToRunner chan *pb.ConnectResponse 51 + 52 + // Done is closed when the runner disconnects. 53 + Done chan struct{} 54 + } 55 + 56 + func newRunnerStream() *RunnerStream { 57 + return &RunnerStream{ 58 + Steps: make(chan StepEvent, 64), 59 + Logs: make(chan LogEvent, 256), 60 + SendToRunner: make(chan *pb.ConnectResponse, 64), 61 + Done: make(chan struct{}), 62 + } 63 + } 64 + 65 + // Hub manages active runner connections and provides channels for the engine 66 + // to consume events from runners. 67 + type Hub struct { 68 + mu sync.RWMutex 69 + streams map[string]*RunnerStream 70 + 71 + // waiters are channels that get notified when a runner with a given key connects. 72 + waitersMu sync.Mutex 73 + waiters map[string][]chan struct{} 74 + } 75 + 76 + // NewHub creates a new Hub. 77 + func NewHub() *Hub { 78 + return &Hub{ 79 + streams: make(map[string]*RunnerStream), 80 + waiters: make(map[string][]chan struct{}), 81 + } 82 + } 83 + 84 + // Register creates a new RunnerStream for the given key. 85 + // Called by the gRPC server when a runner connects. 86 + func (h *Hub) Register(key RunnerKey) *RunnerStream { 87 + h.mu.Lock() 88 + stream := newRunnerStream() 89 + h.streams[key.String()] = stream 90 + h.mu.Unlock() 91 + 92 + // Notify any waiters 93 + h.waitersMu.Lock() 94 + if waiters, ok := h.waiters[key.String()]; ok { 95 + for _, ch := range waiters { 96 + close(ch) 97 + } 98 + delete(h.waiters, key.String()) 99 + } 100 + h.waitersMu.Unlock() 101 + 102 + return stream 103 + } 104 + 105 + // Unregister removes a runner stream and closes its Done channel. 106 + // Called by the gRPC server when a runner disconnects. 107 + func (h *Hub) Unregister(key RunnerKey) { 108 + h.mu.Lock() 109 + defer h.mu.Unlock() 110 + 111 + if stream, ok := h.streams[key.String()]; ok { 112 + close(stream.Done) 113 + delete(h.streams, key.String()) 114 + } 115 + } 116 + 117 + // Get returns the RunnerStream for the given key, or nil if not connected. 118 + func (h *Hub) Get(key RunnerKey) *RunnerStream { 119 + h.mu.RLock() 120 + defer h.mu.RUnlock() 121 + return h.streams[key.String()] 122 + } 123 + 124 + // WaitForRunner returns a channel that is closed when a runner with the given key connects. 125 + // If the runner is already connected, returns a closed channel immediately. 126 + func (h *Hub) WaitForRunner(key RunnerKey) <-chan struct{} { 127 + h.mu.RLock() 128 + if _, ok := h.streams[key.String()]; ok { 129 + h.mu.RUnlock() 130 + ch := make(chan struct{}) 131 + close(ch) 132 + return ch 133 + } 134 + h.mu.RUnlock() 135 + 136 + h.waitersMu.Lock() 137 + defer h.waitersMu.Unlock() 138 + 139 + ch := make(chan struct{}) 140 + h.waiters[key.String()] = append(h.waiters[key.String()], ch) 141 + return ch 142 + }
+136
internal/grpc/server.go
··· 1 + package grpc 2 + 3 + import ( 4 + "fmt" 5 + "io" 6 + "net" 7 + 8 + "google.golang.org/grpc" 9 + "sigs.k8s.io/controller-runtime/pkg/log" 10 + 11 + pb "tangled.org/evan.jarrett.net/loom/internal/pb/loom/v1" 12 + ) 13 + 14 + // Server is the gRPC server that accepts runner connections. 15 + type Server struct { 16 + pb.UnimplementedLoomRunnerServiceServer 17 + 18 + hub *Hub 19 + artifacts *ArtifactStore 20 + grpcServer *grpc.Server 21 + } 22 + 23 + // NewServer creates a new gRPC server with the given hub and artifact store. 24 + func NewServer(hub *Hub, artifacts *ArtifactStore) *Server { 25 + s := &Server{ 26 + hub: hub, 27 + artifacts: artifacts, 28 + grpcServer: grpc.NewServer(), 29 + } 30 + pb.RegisterLoomRunnerServiceServer(s.grpcServer, s) 31 + return s 32 + } 33 + 34 + // Serve starts the gRPC server on the given listener. 35 + func (s *Server) Serve(lis net.Listener) error { 36 + return s.grpcServer.Serve(lis) 37 + } 38 + 39 + // GracefulStop gracefully stops the gRPC server. 40 + func (s *Server) GracefulStop() { 41 + s.grpcServer.GracefulStop() 42 + } 43 + 44 + // Connect handles a bidirectional stream from a runner. 45 + func (s *Server) Connect(stream grpc.BidiStreamingServer[pb.ConnectRequest, pb.ConnectResponse]) error { 46 + logger := log.Log.WithName("grpc") 47 + 48 + // First message must contain identity fields 49 + msg, err := stream.Recv() 50 + if err != nil { 51 + return fmt.Errorf("failed to receive initial message: %w", err) 52 + } 53 + 54 + key := RunnerKey{ 55 + PipelineID: msg.PipelineId, 56 + WorkflowName: msg.WorkflowName, 57 + Architecture: msg.Architecture, 58 + } 59 + 60 + if key.PipelineID == "" || key.WorkflowName == "" || key.Architecture == "" { 61 + return fmt.Errorf("first message must include pipeline_id, workflow_name, and architecture") 62 + } 63 + 64 + logger.Info("runner connected", "key", key.String()) 65 + 66 + // Register this runner 67 + rs := s.hub.Register(key) 68 + defer func() { 69 + s.hub.Unregister(key) 70 + logger.Info("runner disconnected", "key", key.String()) 71 + }() 72 + 73 + // Process the first message's event (if any) 74 + s.processEvent(key, rs, msg) 75 + 76 + // Start goroutine to send responses back to the runner 77 + go func() { 78 + for { 79 + select { 80 + case resp, ok := <-rs.SendToRunner: 81 + if !ok { 82 + return 83 + } 84 + if err := stream.Send(resp); err != nil { 85 + logger.Error(err, "failed to send to runner", "key", key.String()) 86 + return 87 + } 88 + case <-rs.Done: 89 + return 90 + } 91 + } 92 + }() 93 + 94 + // Read events from the runner 95 + for { 96 + msg, err := stream.Recv() 97 + if err == io.EOF { 98 + return nil 99 + } 100 + if err != nil { 101 + return fmt.Errorf("recv error from %s: %w", key.String(), err) 102 + } 103 + s.processEvent(key, rs, msg) 104 + } 105 + } 106 + 107 + // processEvent routes a runner event to the appropriate channel. 108 + func (s *Server) processEvent(key RunnerKey, rs *RunnerStream, msg *pb.ConnectRequest) { 109 + logger := log.Log.WithName("grpc") 110 + 111 + switch evt := msg.Event.(type) { 112 + case *pb.ConnectRequest_StepControl: 113 + rs.Steps <- StepEvent{ 114 + StepID: int(evt.StepControl.StepId), 115 + Status: evt.StepControl.Status, 116 + ExitCode: int(evt.StepControl.ExitCode), 117 + } 118 + case *pb.ConnectRequest_LogLine: 119 + rs.Logs <- LogEvent{ 120 + StepID: int(evt.LogLine.StepId), 121 + Stream: evt.LogLine.Stream, 122 + Content: evt.LogLine.Content, 123 + } 124 + case *pb.ConnectRequest_ArtifactChunk: 125 + // Persist artifact to disk; final jobs stream it back via StreamToRunner. 126 + if s.artifacts != nil { 127 + if err := s.artifacts.WriteChunk(key.PipelineID, key.Architecture, ArtifactEvent{ 128 + Path: evt.ArtifactChunk.Path, 129 + Data: evt.ArtifactChunk.Data, 130 + EOF: evt.ArtifactChunk.Eof, 131 + }); err != nil { 132 + logger.Error(err, "failed to write artifact chunk", "key", key.String()) 133 + } 134 + } 135 + } 136 + }
+18
internal/jobbuilder/job_template.go
··· 67 67 68 68 // Namespace is the Kubernetes namespace for the Job 69 69 Namespace string 70 + 71 + // OperatorAddr is the gRPC address of the Loom operator for runner communication. 72 + // Injected as LOOM_OPERATOR_ADDR env var into the runner container. 73 + OperatorAddr string 70 74 } 71 75 72 76 // nodeMatchesSelector returns true if at least one node has all the labels in selector. ··· 255 259 corev1.EnvVar{ 256 260 Name: "LOOM_SECRET_KEYS", 257 261 Value: strings.Join(config.SecretKeys, ","), 262 + }, 263 + corev1.EnvVar{ 264 + Name: "LOOM_OPERATOR_ADDR", 265 + Value: config.OperatorAddr, 258 266 }, 259 267 ), 260 268 ··· 510 518 MountPath: "/home/runner", 511 519 SubPath: "runner", 512 520 }, 521 + { 522 + Name: "artifacts", 523 + MountPath: "/artifacts", 524 + }, 513 525 } 514 526 515 527 // Mount registry credentials if specified ··· 559 571 }, 560 572 { 561 573 Name: "home-override", 574 + VolumeSource: corev1.VolumeSource{ 575 + EmptyDir: &corev1.EmptyDirVolumeSource{}, 576 + }, 577 + }, 578 + { 579 + Name: "artifacts", 562 580 VolumeSource: corev1.VolumeSource{ 563 581 EmptyDir: &corev1.EmptyDirVolumeSource{}, 564 582 },
+635
internal/pb/loom/v1/loom.pb.go
··· 1 + // Code generated by protoc-gen-go. DO NOT EDIT. 2 + // versions: 3 + // protoc-gen-go v1.36.8 4 + // protoc (unknown) 5 + // source: loom/v1/loom.proto 6 + 7 + package loomv1 8 + 9 + import ( 10 + protoreflect "google.golang.org/protobuf/reflect/protoreflect" 11 + protoimpl "google.golang.org/protobuf/runtime/protoimpl" 12 + reflect "reflect" 13 + sync "sync" 14 + unsafe "unsafe" 15 + ) 16 + 17 + const ( 18 + // Verify that this generated code is sufficiently up-to-date. 19 + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) 20 + // Verify that runtime/protoimpl is sufficiently up-to-date. 21 + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) 22 + ) 23 + 24 + // ConnectRequest is sent from the runner to the operator. 25 + type ConnectRequest struct { 26 + state protoimpl.MessageState `protogen:"open.v1"` 27 + // Identity fields — must be set on the first message, optional on subsequent. 28 + PipelineId string `protobuf:"bytes,1,opt,name=pipeline_id,json=pipelineId,proto3" json:"pipeline_id,omitempty"` 29 + WorkflowName string `protobuf:"bytes,2,opt,name=workflow_name,json=workflowName,proto3" json:"workflow_name,omitempty"` 30 + Architecture string `protobuf:"bytes,3,opt,name=architecture,proto3" json:"architecture,omitempty"` 31 + // Types that are valid to be assigned to Event: 32 + // 33 + // *ConnectRequest_StepControl 34 + // *ConnectRequest_LogLine 35 + // *ConnectRequest_ArtifactChunk 36 + Event isConnectRequest_Event `protobuf_oneof:"event"` 37 + unknownFields protoimpl.UnknownFields 38 + sizeCache protoimpl.SizeCache 39 + } 40 + 41 + func (x *ConnectRequest) Reset() { 42 + *x = ConnectRequest{} 43 + mi := &file_loom_v1_loom_proto_msgTypes[0] 44 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 45 + ms.StoreMessageInfo(mi) 46 + } 47 + 48 + func (x *ConnectRequest) String() string { 49 + return protoimpl.X.MessageStringOf(x) 50 + } 51 + 52 + func (*ConnectRequest) ProtoMessage() {} 53 + 54 + func (x *ConnectRequest) ProtoReflect() protoreflect.Message { 55 + mi := &file_loom_v1_loom_proto_msgTypes[0] 56 + if x != nil { 57 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 58 + if ms.LoadMessageInfo() == nil { 59 + ms.StoreMessageInfo(mi) 60 + } 61 + return ms 62 + } 63 + return mi.MessageOf(x) 64 + } 65 + 66 + // Deprecated: Use ConnectRequest.ProtoReflect.Descriptor instead. 67 + func (*ConnectRequest) Descriptor() ([]byte, []int) { 68 + return file_loom_v1_loom_proto_rawDescGZIP(), []int{0} 69 + } 70 + 71 + func (x *ConnectRequest) GetPipelineId() string { 72 + if x != nil { 73 + return x.PipelineId 74 + } 75 + return "" 76 + } 77 + 78 + func (x *ConnectRequest) GetWorkflowName() string { 79 + if x != nil { 80 + return x.WorkflowName 81 + } 82 + return "" 83 + } 84 + 85 + func (x *ConnectRequest) GetArchitecture() string { 86 + if x != nil { 87 + return x.Architecture 88 + } 89 + return "" 90 + } 91 + 92 + func (x *ConnectRequest) GetEvent() isConnectRequest_Event { 93 + if x != nil { 94 + return x.Event 95 + } 96 + return nil 97 + } 98 + 99 + func (x *ConnectRequest) GetStepControl() *StepControl { 100 + if x != nil { 101 + if x, ok := x.Event.(*ConnectRequest_StepControl); ok { 102 + return x.StepControl 103 + } 104 + } 105 + return nil 106 + } 107 + 108 + func (x *ConnectRequest) GetLogLine() *LogLine { 109 + if x != nil { 110 + if x, ok := x.Event.(*ConnectRequest_LogLine); ok { 111 + return x.LogLine 112 + } 113 + } 114 + return nil 115 + } 116 + 117 + func (x *ConnectRequest) GetArtifactChunk() *ArtifactChunk { 118 + if x != nil { 119 + if x, ok := x.Event.(*ConnectRequest_ArtifactChunk); ok { 120 + return x.ArtifactChunk 121 + } 122 + } 123 + return nil 124 + } 125 + 126 + type isConnectRequest_Event interface { 127 + isConnectRequest_Event() 128 + } 129 + 130 + type ConnectRequest_StepControl struct { 131 + StepControl *StepControl `protobuf:"bytes,4,opt,name=step_control,json=stepControl,proto3,oneof"` 132 + } 133 + 134 + type ConnectRequest_LogLine struct { 135 + LogLine *LogLine `protobuf:"bytes,5,opt,name=log_line,json=logLine,proto3,oneof"` 136 + } 137 + 138 + type ConnectRequest_ArtifactChunk struct { 139 + ArtifactChunk *ArtifactChunk `protobuf:"bytes,6,opt,name=artifact_chunk,json=artifactChunk,proto3,oneof"` 140 + } 141 + 142 + func (*ConnectRequest_StepControl) isConnectRequest_Event() {} 143 + 144 + func (*ConnectRequest_LogLine) isConnectRequest_Event() {} 145 + 146 + func (*ConnectRequest_ArtifactChunk) isConnectRequest_Event() {} 147 + 148 + // ConnectResponse is sent from the operator to the runner. 149 + type ConnectResponse struct { 150 + state protoimpl.MessageState `protogen:"open.v1"` 151 + // Types that are valid to be assigned to Event: 152 + // 153 + // *ConnectResponse_Ack 154 + // *ConnectResponse_ArtifactData 155 + Event isConnectResponse_Event `protobuf_oneof:"event"` 156 + unknownFields protoimpl.UnknownFields 157 + sizeCache protoimpl.SizeCache 158 + } 159 + 160 + func (x *ConnectResponse) Reset() { 161 + *x = ConnectResponse{} 162 + mi := &file_loom_v1_loom_proto_msgTypes[1] 163 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 164 + ms.StoreMessageInfo(mi) 165 + } 166 + 167 + func (x *ConnectResponse) String() string { 168 + return protoimpl.X.MessageStringOf(x) 169 + } 170 + 171 + func (*ConnectResponse) ProtoMessage() {} 172 + 173 + func (x *ConnectResponse) ProtoReflect() protoreflect.Message { 174 + mi := &file_loom_v1_loom_proto_msgTypes[1] 175 + if x != nil { 176 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 177 + if ms.LoadMessageInfo() == nil { 178 + ms.StoreMessageInfo(mi) 179 + } 180 + return ms 181 + } 182 + return mi.MessageOf(x) 183 + } 184 + 185 + // Deprecated: Use ConnectResponse.ProtoReflect.Descriptor instead. 186 + func (*ConnectResponse) Descriptor() ([]byte, []int) { 187 + return file_loom_v1_loom_proto_rawDescGZIP(), []int{1} 188 + } 189 + 190 + func (x *ConnectResponse) GetEvent() isConnectResponse_Event { 191 + if x != nil { 192 + return x.Event 193 + } 194 + return nil 195 + } 196 + 197 + func (x *ConnectResponse) GetAck() *Ack { 198 + if x != nil { 199 + if x, ok := x.Event.(*ConnectResponse_Ack); ok { 200 + return x.Ack 201 + } 202 + } 203 + return nil 204 + } 205 + 206 + func (x *ConnectResponse) GetArtifactData() *ArtifactData { 207 + if x != nil { 208 + if x, ok := x.Event.(*ConnectResponse_ArtifactData); ok { 209 + return x.ArtifactData 210 + } 211 + } 212 + return nil 213 + } 214 + 215 + type isConnectResponse_Event interface { 216 + isConnectResponse_Event() 217 + } 218 + 219 + type ConnectResponse_Ack struct { 220 + Ack *Ack `protobuf:"bytes,1,opt,name=ack,proto3,oneof"` 221 + } 222 + 223 + type ConnectResponse_ArtifactData struct { 224 + ArtifactData *ArtifactData `protobuf:"bytes,2,opt,name=artifact_data,json=artifactData,proto3,oneof"` 225 + } 226 + 227 + func (*ConnectResponse_Ack) isConnectResponse_Event() {} 228 + 229 + func (*ConnectResponse_ArtifactData) isConnectResponse_Event() {} 230 + 231 + // StepControl signals step lifecycle transitions. 232 + type StepControl struct { 233 + state protoimpl.MessageState `protogen:"open.v1"` 234 + StepId int32 `protobuf:"varint,1,opt,name=step_id,json=stepId,proto3" json:"step_id,omitempty"` 235 + // "start" or "end" 236 + Status string `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` 237 + // Exit code of the step command (only meaningful when status = "end"). 238 + ExitCode int32 `protobuf:"varint,3,opt,name=exit_code,json=exitCode,proto3" json:"exit_code,omitempty"` 239 + unknownFields protoimpl.UnknownFields 240 + sizeCache protoimpl.SizeCache 241 + } 242 + 243 + func (x *StepControl) Reset() { 244 + *x = StepControl{} 245 + mi := &file_loom_v1_loom_proto_msgTypes[2] 246 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 247 + ms.StoreMessageInfo(mi) 248 + } 249 + 250 + func (x *StepControl) String() string { 251 + return protoimpl.X.MessageStringOf(x) 252 + } 253 + 254 + func (*StepControl) ProtoMessage() {} 255 + 256 + func (x *StepControl) ProtoReflect() protoreflect.Message { 257 + mi := &file_loom_v1_loom_proto_msgTypes[2] 258 + if x != nil { 259 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 260 + if ms.LoadMessageInfo() == nil { 261 + ms.StoreMessageInfo(mi) 262 + } 263 + return ms 264 + } 265 + return mi.MessageOf(x) 266 + } 267 + 268 + // Deprecated: Use StepControl.ProtoReflect.Descriptor instead. 269 + func (*StepControl) Descriptor() ([]byte, []int) { 270 + return file_loom_v1_loom_proto_rawDescGZIP(), []int{2} 271 + } 272 + 273 + func (x *StepControl) GetStepId() int32 { 274 + if x != nil { 275 + return x.StepId 276 + } 277 + return 0 278 + } 279 + 280 + func (x *StepControl) GetStatus() string { 281 + if x != nil { 282 + return x.Status 283 + } 284 + return "" 285 + } 286 + 287 + func (x *StepControl) GetExitCode() int32 { 288 + if x != nil { 289 + return x.ExitCode 290 + } 291 + return 0 292 + } 293 + 294 + // LogLine carries a single line of step output. 295 + type LogLine struct { 296 + state protoimpl.MessageState `protogen:"open.v1"` 297 + StepId int32 `protobuf:"varint,1,opt,name=step_id,json=stepId,proto3" json:"step_id,omitempty"` 298 + // "stdout" or "stderr" 299 + Stream string `protobuf:"bytes,2,opt,name=stream,proto3" json:"stream,omitempty"` 300 + Content string `protobuf:"bytes,3,opt,name=content,proto3" json:"content,omitempty"` 301 + unknownFields protoimpl.UnknownFields 302 + sizeCache protoimpl.SizeCache 303 + } 304 + 305 + func (x *LogLine) Reset() { 306 + *x = LogLine{} 307 + mi := &file_loom_v1_loom_proto_msgTypes[3] 308 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 309 + ms.StoreMessageInfo(mi) 310 + } 311 + 312 + func (x *LogLine) String() string { 313 + return protoimpl.X.MessageStringOf(x) 314 + } 315 + 316 + func (*LogLine) ProtoMessage() {} 317 + 318 + func (x *LogLine) ProtoReflect() protoreflect.Message { 319 + mi := &file_loom_v1_loom_proto_msgTypes[3] 320 + if x != nil { 321 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 322 + if ms.LoadMessageInfo() == nil { 323 + ms.StoreMessageInfo(mi) 324 + } 325 + return ms 326 + } 327 + return mi.MessageOf(x) 328 + } 329 + 330 + // Deprecated: Use LogLine.ProtoReflect.Descriptor instead. 331 + func (*LogLine) Descriptor() ([]byte, []int) { 332 + return file_loom_v1_loom_proto_rawDescGZIP(), []int{3} 333 + } 334 + 335 + func (x *LogLine) GetStepId() int32 { 336 + if x != nil { 337 + return x.StepId 338 + } 339 + return 0 340 + } 341 + 342 + func (x *LogLine) GetStream() string { 343 + if x != nil { 344 + return x.Stream 345 + } 346 + return "" 347 + } 348 + 349 + func (x *LogLine) GetContent() string { 350 + if x != nil { 351 + return x.Content 352 + } 353 + return "" 354 + } 355 + 356 + // ArtifactChunk streams a file from runner to operator in chunks. 357 + // The runner sends one or more chunks per file, with eof=true on the last chunk. 358 + type ArtifactChunk struct { 359 + state protoimpl.MessageState `protogen:"open.v1"` 360 + // Relative path within the artifacts directory (e.g., "bin/myapp"). 361 + Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"` 362 + Data []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` 363 + Eof bool `protobuf:"varint,3,opt,name=eof,proto3" json:"eof,omitempty"` 364 + unknownFields protoimpl.UnknownFields 365 + sizeCache protoimpl.SizeCache 366 + } 367 + 368 + func (x *ArtifactChunk) Reset() { 369 + *x = ArtifactChunk{} 370 + mi := &file_loom_v1_loom_proto_msgTypes[4] 371 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 372 + ms.StoreMessageInfo(mi) 373 + } 374 + 375 + func (x *ArtifactChunk) String() string { 376 + return protoimpl.X.MessageStringOf(x) 377 + } 378 + 379 + func (*ArtifactChunk) ProtoMessage() {} 380 + 381 + func (x *ArtifactChunk) ProtoReflect() protoreflect.Message { 382 + mi := &file_loom_v1_loom_proto_msgTypes[4] 383 + if x != nil { 384 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 385 + if ms.LoadMessageInfo() == nil { 386 + ms.StoreMessageInfo(mi) 387 + } 388 + return ms 389 + } 390 + return mi.MessageOf(x) 391 + } 392 + 393 + // Deprecated: Use ArtifactChunk.ProtoReflect.Descriptor instead. 394 + func (*ArtifactChunk) Descriptor() ([]byte, []int) { 395 + return file_loom_v1_loom_proto_rawDescGZIP(), []int{4} 396 + } 397 + 398 + func (x *ArtifactChunk) GetPath() string { 399 + if x != nil { 400 + return x.Path 401 + } 402 + return "" 403 + } 404 + 405 + func (x *ArtifactChunk) GetData() []byte { 406 + if x != nil { 407 + return x.Data 408 + } 409 + return nil 410 + } 411 + 412 + func (x *ArtifactChunk) GetEof() bool { 413 + if x != nil { 414 + return x.Eof 415 + } 416 + return false 417 + } 418 + 419 + // ArtifactData streams artifact files from operator to runner (for final jobs). 420 + // The operator sends collected artifacts from matrix legs to the final runner. 421 + type ArtifactData struct { 422 + state protoimpl.MessageState `protogen:"open.v1"` 423 + // Architecture of the source matrix leg (e.g., "amd64"). 424 + SourceArchitecture string `protobuf:"bytes,1,opt,name=source_architecture,json=sourceArchitecture,proto3" json:"source_architecture,omitempty"` 425 + // Relative path within the artifacts directory. 426 + Path string `protobuf:"bytes,2,opt,name=path,proto3" json:"path,omitempty"` 427 + Data []byte `protobuf:"bytes,3,opt,name=data,proto3" json:"data,omitempty"` 428 + Eof bool `protobuf:"varint,4,opt,name=eof,proto3" json:"eof,omitempty"` 429 + unknownFields protoimpl.UnknownFields 430 + sizeCache protoimpl.SizeCache 431 + } 432 + 433 + func (x *ArtifactData) Reset() { 434 + *x = ArtifactData{} 435 + mi := &file_loom_v1_loom_proto_msgTypes[5] 436 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 437 + ms.StoreMessageInfo(mi) 438 + } 439 + 440 + func (x *ArtifactData) String() string { 441 + return protoimpl.X.MessageStringOf(x) 442 + } 443 + 444 + func (*ArtifactData) ProtoMessage() {} 445 + 446 + func (x *ArtifactData) ProtoReflect() protoreflect.Message { 447 + mi := &file_loom_v1_loom_proto_msgTypes[5] 448 + if x != nil { 449 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 450 + if ms.LoadMessageInfo() == nil { 451 + ms.StoreMessageInfo(mi) 452 + } 453 + return ms 454 + } 455 + return mi.MessageOf(x) 456 + } 457 + 458 + // Deprecated: Use ArtifactData.ProtoReflect.Descriptor instead. 459 + func (*ArtifactData) Descriptor() ([]byte, []int) { 460 + return file_loom_v1_loom_proto_rawDescGZIP(), []int{5} 461 + } 462 + 463 + func (x *ArtifactData) GetSourceArchitecture() string { 464 + if x != nil { 465 + return x.SourceArchitecture 466 + } 467 + return "" 468 + } 469 + 470 + func (x *ArtifactData) GetPath() string { 471 + if x != nil { 472 + return x.Path 473 + } 474 + return "" 475 + } 476 + 477 + func (x *ArtifactData) GetData() []byte { 478 + if x != nil { 479 + return x.Data 480 + } 481 + return nil 482 + } 483 + 484 + func (x *ArtifactData) GetEof() bool { 485 + if x != nil { 486 + return x.Eof 487 + } 488 + return false 489 + } 490 + 491 + // Ack acknowledges receipt of a runner event. 492 + type Ack struct { 493 + state protoimpl.MessageState `protogen:"open.v1"` 494 + unknownFields protoimpl.UnknownFields 495 + sizeCache protoimpl.SizeCache 496 + } 497 + 498 + func (x *Ack) Reset() { 499 + *x = Ack{} 500 + mi := &file_loom_v1_loom_proto_msgTypes[6] 501 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 502 + ms.StoreMessageInfo(mi) 503 + } 504 + 505 + func (x *Ack) String() string { 506 + return protoimpl.X.MessageStringOf(x) 507 + } 508 + 509 + func (*Ack) ProtoMessage() {} 510 + 511 + func (x *Ack) ProtoReflect() protoreflect.Message { 512 + mi := &file_loom_v1_loom_proto_msgTypes[6] 513 + if x != nil { 514 + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 515 + if ms.LoadMessageInfo() == nil { 516 + ms.StoreMessageInfo(mi) 517 + } 518 + return ms 519 + } 520 + return mi.MessageOf(x) 521 + } 522 + 523 + // Deprecated: Use Ack.ProtoReflect.Descriptor instead. 524 + func (*Ack) Descriptor() ([]byte, []int) { 525 + return file_loom_v1_loom_proto_rawDescGZIP(), []int{6} 526 + } 527 + 528 + var File_loom_v1_loom_proto protoreflect.FileDescriptor 529 + 530 + const file_loom_v1_loom_proto_rawDesc = "" + 531 + "\n" + 532 + "\x12loom/v1/loom.proto\x12\aloom.v1\"\xae\x02\n" + 533 + "\x0eConnectRequest\x12\x1f\n" + 534 + "\vpipeline_id\x18\x01 \x01(\tR\n" + 535 + "pipelineId\x12#\n" + 536 + "\rworkflow_name\x18\x02 \x01(\tR\fworkflowName\x12\"\n" + 537 + "\farchitecture\x18\x03 \x01(\tR\farchitecture\x129\n" + 538 + "\fstep_control\x18\x04 \x01(\v2\x14.loom.v1.StepControlH\x00R\vstepControl\x12-\n" + 539 + "\blog_line\x18\x05 \x01(\v2\x10.loom.v1.LogLineH\x00R\alogLine\x12?\n" + 540 + "\x0eartifact_chunk\x18\x06 \x01(\v2\x16.loom.v1.ArtifactChunkH\x00R\rartifactChunkB\a\n" + 541 + "\x05event\"z\n" + 542 + "\x0fConnectResponse\x12 \n" + 543 + "\x03ack\x18\x01 \x01(\v2\f.loom.v1.AckH\x00R\x03ack\x12<\n" + 544 + "\rartifact_data\x18\x02 \x01(\v2\x15.loom.v1.ArtifactDataH\x00R\fartifactDataB\a\n" + 545 + "\x05event\"[\n" + 546 + "\vStepControl\x12\x17\n" + 547 + "\astep_id\x18\x01 \x01(\x05R\x06stepId\x12\x16\n" + 548 + "\x06status\x18\x02 \x01(\tR\x06status\x12\x1b\n" + 549 + "\texit_code\x18\x03 \x01(\x05R\bexitCode\"T\n" + 550 + "\aLogLine\x12\x17\n" + 551 + "\astep_id\x18\x01 \x01(\x05R\x06stepId\x12\x16\n" + 552 + "\x06stream\x18\x02 \x01(\tR\x06stream\x12\x18\n" + 553 + "\acontent\x18\x03 \x01(\tR\acontent\"I\n" + 554 + "\rArtifactChunk\x12\x12\n" + 555 + "\x04path\x18\x01 \x01(\tR\x04path\x12\x12\n" + 556 + "\x04data\x18\x02 \x01(\fR\x04data\x12\x10\n" + 557 + "\x03eof\x18\x03 \x01(\bR\x03eof\"y\n" + 558 + "\fArtifactData\x12/\n" + 559 + "\x13source_architecture\x18\x01 \x01(\tR\x12sourceArchitecture\x12\x12\n" + 560 + "\x04path\x18\x02 \x01(\tR\x04path\x12\x12\n" + 561 + "\x04data\x18\x03 \x01(\fR\x04data\x12\x10\n" + 562 + "\x03eof\x18\x04 \x01(\bR\x03eof\"\x05\n" + 563 + "\x03Ack2U\n" + 564 + "\x11LoomRunnerService\x12@\n" + 565 + "\aConnect\x12\x17.loom.v1.ConnectRequest\x1a\x18.loom.v1.ConnectResponse(\x010\x01B6Z4tangled.org/evan.jarrett.net/loom/internal/pb/loomv1b\x06proto3" 566 + 567 + var ( 568 + file_loom_v1_loom_proto_rawDescOnce sync.Once 569 + file_loom_v1_loom_proto_rawDescData []byte 570 + ) 571 + 572 + func file_loom_v1_loom_proto_rawDescGZIP() []byte { 573 + file_loom_v1_loom_proto_rawDescOnce.Do(func() { 574 + file_loom_v1_loom_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_loom_v1_loom_proto_rawDesc), len(file_loom_v1_loom_proto_rawDesc))) 575 + }) 576 + return file_loom_v1_loom_proto_rawDescData 577 + } 578 + 579 + var file_loom_v1_loom_proto_msgTypes = make([]protoimpl.MessageInfo, 7) 580 + var file_loom_v1_loom_proto_goTypes = []any{ 581 + (*ConnectRequest)(nil), // 0: loom.v1.ConnectRequest 582 + (*ConnectResponse)(nil), // 1: loom.v1.ConnectResponse 583 + (*StepControl)(nil), // 2: loom.v1.StepControl 584 + (*LogLine)(nil), // 3: loom.v1.LogLine 585 + (*ArtifactChunk)(nil), // 4: loom.v1.ArtifactChunk 586 + (*ArtifactData)(nil), // 5: loom.v1.ArtifactData 587 + (*Ack)(nil), // 6: loom.v1.Ack 588 + } 589 + var file_loom_v1_loom_proto_depIdxs = []int32{ 590 + 2, // 0: loom.v1.ConnectRequest.step_control:type_name -> loom.v1.StepControl 591 + 3, // 1: loom.v1.ConnectRequest.log_line:type_name -> loom.v1.LogLine 592 + 4, // 2: loom.v1.ConnectRequest.artifact_chunk:type_name -> loom.v1.ArtifactChunk 593 + 6, // 3: loom.v1.ConnectResponse.ack:type_name -> loom.v1.Ack 594 + 5, // 4: loom.v1.ConnectResponse.artifact_data:type_name -> loom.v1.ArtifactData 595 + 0, // 5: loom.v1.LoomRunnerService.Connect:input_type -> loom.v1.ConnectRequest 596 + 1, // 6: loom.v1.LoomRunnerService.Connect:output_type -> loom.v1.ConnectResponse 597 + 6, // [6:7] is the sub-list for method output_type 598 + 5, // [5:6] is the sub-list for method input_type 599 + 5, // [5:5] is the sub-list for extension type_name 600 + 5, // [5:5] is the sub-list for extension extendee 601 + 0, // [0:5] is the sub-list for field type_name 602 + } 603 + 604 + func init() { file_loom_v1_loom_proto_init() } 605 + func file_loom_v1_loom_proto_init() { 606 + if File_loom_v1_loom_proto != nil { 607 + return 608 + } 609 + file_loom_v1_loom_proto_msgTypes[0].OneofWrappers = []any{ 610 + (*ConnectRequest_StepControl)(nil), 611 + (*ConnectRequest_LogLine)(nil), 612 + (*ConnectRequest_ArtifactChunk)(nil), 613 + } 614 + file_loom_v1_loom_proto_msgTypes[1].OneofWrappers = []any{ 615 + (*ConnectResponse_Ack)(nil), 616 + (*ConnectResponse_ArtifactData)(nil), 617 + } 618 + type x struct{} 619 + out := protoimpl.TypeBuilder{ 620 + File: protoimpl.DescBuilder{ 621 + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), 622 + RawDescriptor: unsafe.Slice(unsafe.StringData(file_loom_v1_loom_proto_rawDesc), len(file_loom_v1_loom_proto_rawDesc)), 623 + NumEnums: 0, 624 + NumMessages: 7, 625 + NumExtensions: 0, 626 + NumServices: 1, 627 + }, 628 + GoTypes: file_loom_v1_loom_proto_goTypes, 629 + DependencyIndexes: file_loom_v1_loom_proto_depIdxs, 630 + MessageInfos: file_loom_v1_loom_proto_msgTypes, 631 + }.Build() 632 + File_loom_v1_loom_proto = out.File 633 + file_loom_v1_loom_proto_goTypes = nil 634 + file_loom_v1_loom_proto_depIdxs = nil 635 + }
+129
internal/pb/loom/v1/loom_grpc.pb.go
··· 1 + // Code generated by protoc-gen-go-grpc. DO NOT EDIT. 2 + // versions: 3 + // - protoc-gen-go-grpc v1.5.1 4 + // - protoc (unknown) 5 + // source: loom/v1/loom.proto 6 + 7 + package loomv1 8 + 9 + import ( 10 + context "context" 11 + grpc "google.golang.org/grpc" 12 + codes "google.golang.org/grpc/codes" 13 + status "google.golang.org/grpc/status" 14 + ) 15 + 16 + // This is a compile-time assertion to ensure that this generated file 17 + // is compatible with the grpc package it is being compiled against. 18 + // Requires gRPC-Go v1.64.0 or later. 19 + const _ = grpc.SupportPackageIsVersion9 20 + 21 + const ( 22 + LoomRunnerService_Connect_FullMethodName = "/loom.v1.LoomRunnerService/Connect" 23 + ) 24 + 25 + // LoomRunnerServiceClient is the client API for LoomRunnerService service. 26 + // 27 + // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. 28 + // 29 + // LoomRunnerService is the bidirectional communication channel between runner pods 30 + // and the Loom operator. Runners connect on startup and stream all events 31 + // (step control, log output, artifacts) over this single connection. 32 + type LoomRunnerServiceClient interface { 33 + // Connect establishes a bidirectional stream between a runner and the operator. 34 + // The runner identifies itself in the first message and then streams events. 35 + // The operator sends acknowledgements and commands back. 36 + Connect(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[ConnectRequest, ConnectResponse], error) 37 + } 38 + 39 + type loomRunnerServiceClient struct { 40 + cc grpc.ClientConnInterface 41 + } 42 + 43 + func NewLoomRunnerServiceClient(cc grpc.ClientConnInterface) LoomRunnerServiceClient { 44 + return &loomRunnerServiceClient{cc} 45 + } 46 + 47 + func (c *loomRunnerServiceClient) Connect(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[ConnectRequest, ConnectResponse], error) { 48 + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) 49 + stream, err := c.cc.NewStream(ctx, &LoomRunnerService_ServiceDesc.Streams[0], LoomRunnerService_Connect_FullMethodName, cOpts...) 50 + if err != nil { 51 + return nil, err 52 + } 53 + x := &grpc.GenericClientStream[ConnectRequest, ConnectResponse]{ClientStream: stream} 54 + return x, nil 55 + } 56 + 57 + // This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. 58 + type LoomRunnerService_ConnectClient = grpc.BidiStreamingClient[ConnectRequest, ConnectResponse] 59 + 60 + // LoomRunnerServiceServer is the server API for LoomRunnerService service. 61 + // All implementations must embed UnimplementedLoomRunnerServiceServer 62 + // for forward compatibility. 63 + // 64 + // LoomRunnerService is the bidirectional communication channel between runner pods 65 + // and the Loom operator. Runners connect on startup and stream all events 66 + // (step control, log output, artifacts) over this single connection. 67 + type LoomRunnerServiceServer interface { 68 + // Connect establishes a bidirectional stream between a runner and the operator. 69 + // The runner identifies itself in the first message and then streams events. 70 + // The operator sends acknowledgements and commands back. 71 + Connect(grpc.BidiStreamingServer[ConnectRequest, ConnectResponse]) error 72 + mustEmbedUnimplementedLoomRunnerServiceServer() 73 + } 74 + 75 + // UnimplementedLoomRunnerServiceServer must be embedded to have 76 + // forward compatible implementations. 77 + // 78 + // NOTE: this should be embedded by value instead of pointer to avoid a nil 79 + // pointer dereference when methods are called. 80 + type UnimplementedLoomRunnerServiceServer struct{} 81 + 82 + func (UnimplementedLoomRunnerServiceServer) Connect(grpc.BidiStreamingServer[ConnectRequest, ConnectResponse]) error { 83 + return status.Errorf(codes.Unimplemented, "method Connect not implemented") 84 + } 85 + func (UnimplementedLoomRunnerServiceServer) mustEmbedUnimplementedLoomRunnerServiceServer() {} 86 + func (UnimplementedLoomRunnerServiceServer) testEmbeddedByValue() {} 87 + 88 + // UnsafeLoomRunnerServiceServer may be embedded to opt out of forward compatibility for this service. 89 + // Use of this interface is not recommended, as added methods to LoomRunnerServiceServer will 90 + // result in compilation errors. 91 + type UnsafeLoomRunnerServiceServer interface { 92 + mustEmbedUnimplementedLoomRunnerServiceServer() 93 + } 94 + 95 + func RegisterLoomRunnerServiceServer(s grpc.ServiceRegistrar, srv LoomRunnerServiceServer) { 96 + // If the following call pancis, it indicates UnimplementedLoomRunnerServiceServer was 97 + // embedded by pointer and is nil. This will cause panics if an 98 + // unimplemented method is ever invoked, so we test this at initialization 99 + // time to prevent it from happening at runtime later due to I/O. 100 + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { 101 + t.testEmbeddedByValue() 102 + } 103 + s.RegisterService(&LoomRunnerService_ServiceDesc, srv) 104 + } 105 + 106 + func _LoomRunnerService_Connect_Handler(srv interface{}, stream grpc.ServerStream) error { 107 + return srv.(LoomRunnerServiceServer).Connect(&grpc.GenericServerStream[ConnectRequest, ConnectResponse]{ServerStream: stream}) 108 + } 109 + 110 + // This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. 111 + type LoomRunnerService_ConnectServer = grpc.BidiStreamingServer[ConnectRequest, ConnectResponse] 112 + 113 + // LoomRunnerService_ServiceDesc is the grpc.ServiceDesc for LoomRunnerService service. 114 + // It's only intended for direct use with grpc.RegisterService, 115 + // and not to be introspected or modified (even as a copy) 116 + var LoomRunnerService_ServiceDesc = grpc.ServiceDesc{ 117 + ServiceName: "loom.v1.LoomRunnerService", 118 + HandlerType: (*LoomRunnerServiceServer)(nil), 119 + Methods: []grpc.MethodDesc{}, 120 + Streams: []grpc.StreamDesc{ 121 + { 122 + StreamName: "Connect", 123 + Handler: _LoomRunnerService_Connect_Handler, 124 + ServerStreams: true, 125 + ClientStreams: true, 126 + }, 127 + }, 128 + Metadata: "loom/v1/loom.proto", 129 + }
+77
proto/loom/v1/loom.proto
··· 1 + syntax = "proto3"; 2 + 3 + package loom.v1; 4 + 5 + option go_package = "tangled.org/evan.jarrett.net/loom/internal/pb/loomv1"; 6 + 7 + // LoomRunnerService is the bidirectional communication channel between runner pods 8 + // and the Loom operator. Runners connect on startup and stream all events 9 + // (step control, log output, artifacts) over this single connection. 10 + service LoomRunnerService { 11 + // Connect establishes a bidirectional stream between a runner and the operator. 12 + // The runner identifies itself in the first message and then streams events. 13 + // The operator sends acknowledgements and commands back. 14 + rpc Connect(stream ConnectRequest) returns (stream ConnectResponse); 15 + } 16 + 17 + // ConnectRequest is sent from the runner to the operator. 18 + message ConnectRequest { 19 + // Identity fields — must be set on the first message, optional on subsequent. 20 + string pipeline_id = 1; 21 + string workflow_name = 2; 22 + string architecture = 3; 23 + 24 + oneof event { 25 + StepControl step_control = 4; 26 + LogLine log_line = 5; 27 + ArtifactChunk artifact_chunk = 6; 28 + } 29 + } 30 + 31 + // ConnectResponse is sent from the operator to the runner. 32 + message ConnectResponse { 33 + oneof event { 34 + Ack ack = 1; 35 + ArtifactData artifact_data = 2; 36 + } 37 + } 38 + 39 + // StepControl signals step lifecycle transitions. 40 + message StepControl { 41 + int32 step_id = 1; 42 + // "start" or "end" 43 + string status = 2; 44 + // Exit code of the step command (only meaningful when status = "end"). 45 + int32 exit_code = 3; 46 + } 47 + 48 + // LogLine carries a single line of step output. 49 + message LogLine { 50 + int32 step_id = 1; 51 + // "stdout" or "stderr" 52 + string stream = 2; 53 + string content = 3; 54 + } 55 + 56 + // ArtifactChunk streams a file from runner to operator in chunks. 57 + // The runner sends one or more chunks per file, with eof=true on the last chunk. 58 + message ArtifactChunk { 59 + // Relative path within the artifacts directory (e.g., "bin/myapp"). 60 + string path = 1; 61 + bytes data = 2; 62 + bool eof = 3; 63 + } 64 + 65 + // ArtifactData streams artifact files from operator to runner (for final jobs). 66 + // The operator sends collected artifacts from matrix legs to the final runner. 67 + message ArtifactData { 68 + // Architecture of the source matrix leg (e.g., "amd64"). 69 + string source_architecture = 1; 70 + // Relative path within the artifacts directory. 71 + string path = 2; 72 + bytes data = 3; 73 + bool eof = 4; 74 + } 75 + 76 + // Ack acknowledges receipt of a runner event. 77 + message Ack {}