···11-# Build the manager binary only
11+# Build the manager and agent binaries
22FROM golang:1.24-alpine AS builder
33ARG TARGETOS
44ARG TARGETARCH
···1111# and so that source changes don't invalidate our downloaded layer
1212RUN go mod download
13131414-COPY cmd/manager cmd/manager
1414+COPY cmd/ cmd/
1515COPY api/ api/
1616COPY internal/ internal/
1717···2020# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
2121# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
2222RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/manager/main.go
2323+RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o agent cmd/agent/main.go
23242425FROM alpine:3.22 AS base
2526···55565657WORKDIR /
5758COPY --from=builder /workspace/manager .
5959+COPY --from=builder /workspace/agent .
5860USER 65532:65532
59616062ENTRYPOINT ["/manager"]
···167167##@ Build
168168169169.PHONY: build
170170-build: manifests generate fmt vet ## Build manager and discovery binaries.
170170+build: manifests generate fmt vet ## Build manager, discovery, and agent binaries.
171171 go build -o bin/manager cmd/manager/main.go
172172 go build -o bin/discovery cmd/discovery/main.go
173173+ go build -o bin/agent cmd/agent/main.go
173174174175.PHONY: run
175176run: manifests generate fmt vet ## Run a controller from your host.
+162
cmd/agent/main.go
···11+/*
22+Copyright 2025.
33+44+Licensed under the Apache License, Version 2.0 (the "License");
55+you may not use this file except in compliance with the License.
66+You may obtain a copy of the License at
77+88+ http://www.apache.org/licenses/LICENSE-2.0
99+1010+Unless required by applicable law or agreed to in writing, software
1111+distributed under the License is distributed on an "AS IS" BASIS,
1212+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313+See the License for the specific language governing permissions and
1414+limitations under the License.
1515+*/
1616+1717+package main
1818+1919+import (
2020+ "context"
2121+ "flag"
2222+ "fmt"
2323+ "os"
2424+ "os/signal"
2525+ "syscall"
2626+ "time"
2727+2828+ ctrl "sigs.k8s.io/controller-runtime"
2929+ "sigs.k8s.io/controller-runtime/pkg/log/zap"
3030+3131+ "github.com/evanjarrett/hsm-secrets-operator/internal/agent"
3232+ "github.com/evanjarrett/hsm-secrets-operator/internal/hsm"
3333+)
3434+3535+var (
3636+ setupLog = ctrl.Log.WithName("agent")
3737+)
3838+3939+func main() {
4040+ var deviceName string
4141+ var port int
4242+ var healthPort int
4343+ var pkcs11LibraryPath string
4444+ var slotID int
4545+ var tokenLabel string
4646+ var pin string
4747+4848+ flag.StringVar(&deviceName, "device-name", "", "Name of the HSM device this agent serves")
4949+ flag.IntVar(&port, "port", 8092, "Port for the HSM agent API")
5050+ flag.IntVar(&healthPort, "health-port", 8093, "Port for health checks")
5151+ flag.StringVar(&pkcs11LibraryPath, "pkcs11-library", "", "Path to PKCS#11 library")
5252+ flag.IntVar(&slotID, "slot-id", 0, "PKCS#11 slot ID")
5353+ flag.StringVar(&tokenLabel, "token-label", "", "PKCS#11 token label")
5454+ flag.StringVar(&pin, "pin", "", "PKCS#11 PIN (use environment variable HSM_PIN for security)")
5555+5656+ opts := zap.Options{
5757+ Development: true,
5858+ }
5959+ opts.BindFlags(flag.CommandLine)
6060+ flag.Parse()
6161+6262+ ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
6363+6464+ // Validate required parameters
6565+ if deviceName == "" {
6666+ deviceName = os.Getenv("HSM_DEVICE_NAME")
6767+ if deviceName == "" {
6868+ setupLog.Error(fmt.Errorf("device name required"), "Device name must be provided via --device-name or HSM_DEVICE_NAME environment variable")
6969+ os.Exit(1)
7070+ }
7171+ }
7272+7373+ // Get configuration from environment variables if not provided via flags
7474+ if pkcs11LibraryPath == "" {
7575+ pkcs11LibraryPath = os.Getenv("PKCS11_LIBRARY_PATH")
7676+ }
7777+ if tokenLabel == "" {
7878+ tokenLabel = os.Getenv("PKCS11_TOKEN_LABEL")
7979+ }
8080+ if pin == "" {
8181+ pin = os.Getenv("PKCS11_PIN")
8282+ }
8383+8484+ setupLog.Info("Starting HSM agent",
8585+ "device", deviceName,
8686+ "port", port,
8787+ "health-port", healthPort,
8888+ "pkcs11-library", pkcs11LibraryPath,
8989+ "slot-id", slotID,
9090+ "token-label", tokenLabel,
9191+ )
9292+9393+ // Create HSM client
9494+ var hsmClient hsm.Client
9595+9696+ if pkcs11LibraryPath != "" {
9797+ // Create PKCS#11 client for production use
9898+ config := hsm.Config{
9999+ PKCS11LibraryPath: pkcs11LibraryPath,
100100+ SlotID: uint(slotID),
101101+ PIN: pin,
102102+ TokenLabel: tokenLabel,
103103+ ConnectionTimeout: 30 * time.Second,
104104+ RetryAttempts: 3,
105105+ RetryDelay: 2 * time.Second,
106106+ }
107107+108108+ hsmClient = hsm.NewPKCS11Client()
109109+ ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
110110+ defer cancel()
111111+112112+ if err := hsmClient.Initialize(ctx, config); err != nil {
113113+ setupLog.Error(err, "Failed to initialize PKCS#11 client")
114114+ os.Exit(1)
115115+ }
116116+ } else {
117117+ // Use mock client for testing
118118+ setupLog.Info("No PKCS#11 library specified, using mock client")
119119+ hsmClient = hsm.NewMockClient()
120120+121121+ ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
122122+ defer cancel()
123123+124124+ if err := hsmClient.Initialize(ctx, hsm.DefaultConfig()); err != nil {
125125+ setupLog.Error(err, "Failed to initialize mock client")
126126+ os.Exit(1)
127127+ }
128128+ }
129129+130130+ // Create agent server
131131+ server := agent.NewServer(hsmClient, deviceName, port, healthPort, setupLog)
132132+133133+ // Setup graceful shutdown
134134+ ctx, cancel := context.WithCancel(context.Background())
135135+ defer cancel()
136136+137137+ // Handle shutdown signals
138138+ sigChan := make(chan os.Signal, 1)
139139+ signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
140140+141141+ go func() {
142142+ sig := <-sigChan
143143+ setupLog.Info("Received signal, shutting down", "signal", sig)
144144+ cancel()
145145+ }()
146146+147147+ // Start server
148148+ setupLog.Info("HSM agent ready", "device", deviceName)
149149+150150+ if err := server.Start(ctx); err != nil {
151151+ setupLog.Error(err, "Server failed")
152152+ os.Exit(1)
153153+ }
154154+155155+ // Cleanup
156156+ setupLog.Info("Closing HSM client")
157157+ if err := hsmClient.Close(); err != nil {
158158+ setupLog.Error(err, "Failed to close HSM client")
159159+ }
160160+161161+ setupLog.Info("HSM agent shutdown complete")
162162+}
+9
cmd/manager/main.go
···3939 "sigs.k8s.io/controller-runtime/pkg/webhook"
40404141 hsmv1alpha1 "github.com/evanjarrett/hsm-secrets-operator/api/v1alpha1"
4242+ "github.com/evanjarrett/hsm-secrets-operator/internal/agent"
4243 "github.com/evanjarrett/hsm-secrets-operator/internal/api"
4344 "github.com/evanjarrett/hsm-secrets-operator/internal/controller"
4445 "github.com/evanjarrett/hsm-secrets-operator/internal/discovery"
···240241 // Register the HSM client with the mirroring manager for this node
241242 mirroringManager.RegisterHSMClient(nodeName, hsmClient)
242243244244+ // Create agent manager
245245+ agentImage := os.Getenv("AGENT_IMAGE")
246246+ if agentImage == "" {
247247+ agentImage = "hsm-secrets-operator:latest" // Default to same image as manager
248248+ }
249249+ agentManager := agent.NewManager(mgr.GetClient(), agentImage, "hsm-secrets-operator-system")
250250+243251 if err := (&controller.HSMSecretReconciler{
244252 Client: mgr.GetClient(),
245253 Scheme: mgr.GetScheme(),
246254 HSMClient: hsmClient,
247255 MirroringManager: mirroringManager,
256256+ AgentManager: agentManager,
248257 }).SetupWithManager(mgr); err != nil {
249258 setupLog.Error(err, "unable to create controller", "controller", "HSMSecret")
250259 os.Exit(1)
···11+# HSM Agent Pod Architecture
22+33+This directory contains examples of how the HSM agent pod system works for distributed HSM operations.
44+55+## Architecture Overview
66+77+The HSM Secrets Operator now uses a **3-binary architecture**:
88+99+1. **Manager** (`/manager`) - Main operator that orchestrates everything
1010+2. **Discovery** (`/discovery`) - Lightweight USB device discovery (DaemonSet)
1111+3. **Agent** (`/agent`) - HSM operation execution pods (deployed on-demand)
1212+1313+## How It Works
1414+1515+```mermaid
1616+graph TB
1717+ subgraph "Manager Pod (Any Node)"
1818+ M[Manager Controller]
1919+ end
2020+2121+ subgraph "Node with HSM Hardware"
2222+ D[Discovery Pod<br/>DaemonSet]
2323+ A[Agent Pod<br/>On-demand]
2424+ H[HSM Hardware]
2525+ end
2626+2727+ subgraph "Other Nodes"
2828+ D2[Discovery Pod<br/>DaemonSet]
2929+ end
3030+3131+ M -->|1. Find HSMDevice| D
3232+ M -->|2. Deploy Agent| A
3333+ M -->|3. HTTP API calls| A
3434+ A -->|4. PKCS#11| H
3535+ D -->|USB discovery| H
3636+```
3737+3838+## Process Flow
3939+4040+1. **HSMSecret Created**: User creates an HSMSecret resource
4141+2. **Device Discovery**: Manager finds available HSMDevice (discovered by DaemonSet)
4242+3. **Agent Deployment**: Manager deploys HSM agent pod on node with hardware
4343+4. **Node Affinity**: Agent pod pinned to specific node via `kubernetes.io/hostname`
4444+5. **HTTP Communication**: Manager makes HTTP calls to agent for HSM operations
4545+6. **Hardware Access**: Agent executes PKCS#11 operations locally on HSM
4646+4747+## Key Benefits
4848+4949+✅ **Remote Execution**: Manager can be anywhere, agents run where hardware exists
5050+✅ **Resource Efficiency**: Agents only deployed when HSMSecrets exist
5151+✅ **Auto-cleanup**: Agents removed when no longer needed
5252+✅ **Node Targeting**: Perfect placement via HSMDevice discovery
5353+✅ **Clean Architecture**: Each component has single responsibility
5454+5555+## Example Deployment
5656+5757+See `agent-example.yaml` for a complete example of:
5858+- HSMDevice discovery configuration
5959+- HSMSecret that triggers agent deployment
6060+- Secret with PIN configuration
6161+6262+## Agent Pod Configuration
6363+6464+Agent pods are automatically configured with:
6565+- **Environment Variables**: PKCS#11 library path, slot ID, token label
6666+- **Secrets**: PIN from Kubernetes Secret reference
6767+- **Node Affinity**: Pinned to node with actual hardware
6868+- **Security**: Non-privileged execution with proper security contexts
6969+- **Health Checks**: Liveness and readiness probes
7070+- **Resources**: CPU/memory limits and requests
7171+7272+## API Endpoints
7373+7474+Each agent exposes:
7575+- `GET /api/v1/hsm/info` - HSM device information
7676+- `GET /api/v1/hsm/secrets/{path}` - Read secret
7777+- `POST /api/v1/hsm/secrets/{path}` - Write secret
7878+- `DELETE /api/v1/hsm/secrets/{path}` - Delete secret
7979+- `GET /api/v1/hsm/secrets` - List secrets
8080+- `GET /api/v1/hsm/checksum/{path}` - Get checksum
8181+- `GET /healthz` - Health check
8282+- `GET /readyz` - Readiness check